summaryrefslogtreecommitdiff
path: root/src/tree_builder
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2021-04-08 08:42:01 +0200
committerMartin Fischer <martin@push-f.com>2021-04-08 15:40:37 +0200
commit57e7eefcbe6fb8c3dc4b01c707be9de4c34963a7 (patch)
tree6a9d296389bf3023396592c8514ed6712e011c7f /src/tree_builder
import https://github.com/servo/html5ever
commit d1206daa740305f55a5fa159e43eb33afc359cb4
Diffstat (limited to 'src/tree_builder')
-rw-r--r--src/tree_builder/data.rs171
-rw-r--r--src/tree_builder/mod.rs1681
-rw-r--r--src/tree_builder/rules.rs1449
-rw-r--r--src/tree_builder/tag_sets.rs115
-rw-r--r--src/tree_builder/types.rs95
5 files changed, 3511 insertions, 0 deletions
diff --git a/src/tree_builder/data.rs b/src/tree_builder/data.rs
new file mode 100644
index 0000000..9d51a71
--- /dev/null
+++ b/src/tree_builder/data.rs
@@ -0,0 +1,171 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
+use crate::tendril::StrTendril;
+use crate::tokenizer::Doctype;
+
+// These should all be lowercase, for ASCII-case-insensitive matching.
+static QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//",
+ "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//",
+ "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//",
+ "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//",
+ "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//",
+ "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//",
+];
+
+static QUIRKY_PUBLIC_MATCHES: &'static [&'static str] = &[
+ "-//w3o//dtd w3 html strict 3.0//en//",
+ "-/w3c/dtd html 4.0 transitional/en",
+ "html",
+];
+
+static QUIRKY_SYSTEM_MATCHES: &'static [&'static str] =
+ &["http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"];
+
+static LIMITED_QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[
+ "-//w3c//dtd xhtml 1.0 frameset//",
+ "-//w3c//dtd xhtml 1.0 transitional//",
+];
+
+static HTML4_PUBLIC_PREFIXES: &'static [&'static str] = &[
+ "-//w3c//dtd html 4.01 frameset//",
+ "-//w3c//dtd html 4.01 transitional//",
+];
+
+pub fn doctype_error_and_quirks(doctype: &Doctype, iframe_srcdoc: bool) -> (bool, QuirksMode) {
+ fn opt_string_as_slice<'t>(x: &'t Option<String>) -> Option<&'t str> {
+ x.as_ref().map(|y| &y[..])
+ }
+
+ fn opt_tendril_as_slice<'t>(x: &'t Option<StrTendril>) -> Option<&'t str> {
+ match *x {
+ Some(ref t) => Some(t),
+ None => None,
+ }
+ }
+
+ fn opt_to_ascii_lower(x: Option<&str>) -> Option<String> {
+ x.map(|y| y.to_ascii_lowercase())
+ }
+
+ let name = opt_tendril_as_slice(&doctype.name);
+ let public = opt_tendril_as_slice(&doctype.public_id);
+ let system = opt_tendril_as_slice(&doctype.system_id);
+
+ let err = match (name, public, system) {
+ (Some("html"), None, None) |
+ (Some("html"), None, Some("about:legacy-compat")) |
+ (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) |
+ (
+ Some("html"),
+ Some("-//W3C//DTD HTML 4.0//EN"),
+ Some("http://www.w3.org/TR/REC-html40/strict.dtd"),
+ ) |
+ (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) |
+ (
+ Some("html"),
+ Some("-//W3C//DTD HTML 4.01//EN"),
+ Some("http://www.w3.org/TR/html4/strict.dtd"),
+ ) |
+ (
+ Some("html"),
+ Some("-//W3C//DTD XHTML 1.0 Strict//EN"),
+ Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"),
+ ) |
+ (
+ Some("html"),
+ Some("-//W3C//DTD XHTML 1.1//EN"),
+ Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"),
+ ) => false,
+
+ _ => true,
+ };
+
+ // FIXME: We could do something asymptotically faster here.
+ // But there aren't many strings, and this happens at most once per parse.
+ fn contains_pfx(haystack: &[&str], needle: &str) -> bool {
+ haystack.iter().any(|&x| needle.starts_with(x))
+ }
+
+ // Quirks-mode matches are case-insensitive.
+ let public = opt_to_ascii_lower(public);
+ let system = opt_to_ascii_lower(system);
+
+ let quirk = match (opt_string_as_slice(&public), opt_string_as_slice(&system)) {
+ _ if doctype.force_quirks => Quirks,
+ _ if name != Some("html") => Quirks,
+
+ _ if iframe_srcdoc => NoQuirks,
+
+ (Some(ref p), _) if QUIRKY_PUBLIC_MATCHES.contains(p) => Quirks,
+ (_, Some(ref s)) if QUIRKY_SYSTEM_MATCHES.contains(s) => Quirks,
+
+ (Some(p), _) if contains_pfx(QUIRKY_PUBLIC_PREFIXES, p) => Quirks,
+ (Some(p), _) if contains_pfx(LIMITED_QUIRKY_PUBLIC_PREFIXES, p) => LimitedQuirks,
+
+ (Some(p), s) if contains_pfx(HTML4_PUBLIC_PREFIXES, p) => match s {
+ None => Quirks,
+ Some(_) => LimitedQuirks,
+ },
+
+ _ => NoQuirks,
+ };
+
+ (err, quirk)
+}
diff --git a/src/tree_builder/mod.rs b/src/tree_builder/mod.rs
new file mode 100644
index 0000000..a6fa8bf
--- /dev/null
+++ b/src/tree_builder/mod.rs
@@ -0,0 +1,1681 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(warnings)]
+
+//! The HTML5 tree builder.
+
+pub use crate::interface::{create_element, ElementFlags, NextParserState, Tracer, TreeSink};
+pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText};
+pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
+
+use self::types::*;
+
+use crate::tendril::StrTendril;
+use crate::{ExpandedName, LocalName, Namespace, QualName};
+
+use crate::tokenizer;
+use crate::tokenizer::states as tok_state;
+use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult};
+
+use crate::util::str::is_ascii_whitespace;
+
+use std::borrow::Cow::Borrowed;
+use std::collections::VecDeque;
+use std::default::Default;
+use std::iter::{Enumerate, Rev};
+use std::mem::replace;
+use std::{fmt, slice};
+
+use crate::tokenizer::states::{RawData, RawKind};
+use crate::tree_builder::tag_sets::*;
+use crate::tree_builder::types::*;
+use crate::util::str::to_escaped_string;
+use log::{debug, log_enabled, warn, Level};
+use mac::{_tt_as_expr_hack, format_if, matches};
+
+pub use self::PushFlag::*;
+
+#[macro_use]
+mod tag_sets;
+
+mod data;
+mod types;
+
+include!(concat!(env!("OUT_DIR"), "/rules.rs"));
+
+/// Tree builder options, with an impl for Default.
+#[derive(Copy, Clone)]
+pub struct TreeBuilderOpts {
+ /// Report all parse errors described in the spec, at some
+ /// performance penalty? Default: false
+ pub exact_errors: bool,
+
+ /// Is scripting enabled?
+ pub scripting_enabled: bool,
+
+ /// Is this an `iframe srcdoc` document?
+ pub iframe_srcdoc: bool,
+
+ /// Should we drop the DOCTYPE (if any) from the tree?
+ pub drop_doctype: bool,
+
+ /// Obsolete, ignored.
+ pub ignore_missing_rules: bool,
+
+ /// Initial TreeBuilder quirks mode. Default: NoQuirks
+ pub quirks_mode: QuirksMode,
+}
+
+impl Default for TreeBuilderOpts {
+ fn default() -> TreeBuilderOpts {
+ TreeBuilderOpts {
+ exact_errors: false,
+ scripting_enabled: true,
+ iframe_srcdoc: false,
+ drop_doctype: false,
+ ignore_missing_rules: false,
+ quirks_mode: NoQuirks,
+ }
+ }
+}
+
+/// The HTML tree builder.
+pub struct TreeBuilder<Handle, Sink> {
+ /// Options controlling the behavior of the tree builder.
+ opts: TreeBuilderOpts,
+
+ /// Consumer of tree modifications.
+ pub sink: Sink,
+
+ /// Insertion mode.
+ mode: InsertionMode,
+
+ /// Original insertion mode, used by Text and InTableText modes.
+ orig_mode: Option<InsertionMode>,
+
+ /// Stack of template insertion modes.
+ template_modes: Vec<InsertionMode>,
+
+ /// Pending table character tokens.
+ pending_table_text: Vec<(SplitStatus, StrTendril)>,
+
+ /// Quirks mode as set by the parser.
+ /// FIXME: can scripts etc. change this?
+ quirks_mode: QuirksMode,
+
+ /// The document node, which is created by the sink.
+ doc_handle: Handle,
+
+ /// Stack of open elements, most recently added at end.
+ open_elems: Vec<Handle>,
+
+ /// List of active formatting elements.
+ active_formatting: Vec<FormatEntry<Handle>>,
+
+ //§ the-element-pointers
+ /// Head element pointer.
+ head_elem: Option<Handle>,
+
+ /// Form element pointer.
+ form_elem: Option<Handle>,
+ //§ END
+ /// Frameset-ok flag.
+ frameset_ok: bool,
+
+ /// Ignore a following U+000A LINE FEED?
+ ignore_lf: bool,
+
+ /// Is foster parenting enabled?
+ foster_parenting: bool,
+
+ /// The context element for the fragment parsing algorithm.
+ context_elem: Option<Handle>,
+
+ /// Track current line
+ current_line: u64,
+ // WARNING: If you add new fields that contain Handles, you
+ // must add them to trace_handles() below to preserve memory
+ // safety!
+ //
+ // FIXME: Auto-generate the trace hooks like Servo does.
+}
+
+impl<Handle, Sink> TreeBuilder<Handle, Sink>
+where
+ Handle: Clone,
+ Sink: TreeSink<Handle = Handle>,
+{
+ /// Create a new tree builder which sends tree modifications to a particular `TreeSink`.
+ ///
+ /// The tree builder is also a `TokenSink`.
+ pub fn new(mut sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> {
+ let doc_handle = sink.get_document();
+ TreeBuilder {
+ opts: opts,
+ sink: sink,
+ mode: Initial,
+ orig_mode: None,
+ template_modes: vec![],
+ pending_table_text: vec![],
+ quirks_mode: opts.quirks_mode,
+ doc_handle: doc_handle,
+ open_elems: vec![],
+ active_formatting: vec![],
+ head_elem: None,
+ form_elem: None,
+ frameset_ok: true,
+ ignore_lf: false,
+ foster_parenting: false,
+ context_elem: None,
+ current_line: 1,
+ }
+ }
+
+ /// Create a new tree builder which sends tree modifications to a particular `TreeSink`.
+ /// This is for parsing fragments.
+ ///
+ /// The tree builder is also a `TokenSink`.
+ pub fn new_for_fragment(
+ mut sink: Sink,
+ context_elem: Handle,
+ form_elem: Option<Handle>,
+ opts: TreeBuilderOpts,
+ ) -> TreeBuilder<Handle, Sink> {
+ let doc_handle = sink.get_document();
+ let context_is_template = sink.elem_name(&context_elem) == expanded_name!(html "template");
+ let mut tb = TreeBuilder {
+ opts: opts,
+ sink: sink,
+ mode: Initial,
+ orig_mode: None,
+ template_modes: if context_is_template {
+ vec![InTemplate]
+ } else {
+ vec![]
+ },
+ pending_table_text: vec![],
+ quirks_mode: opts.quirks_mode,
+ doc_handle: doc_handle,
+ open_elems: vec![],
+ active_formatting: vec![],
+ head_elem: None,
+ form_elem: form_elem,
+ frameset_ok: true,
+ ignore_lf: false,
+ foster_parenting: false,
+ context_elem: Some(context_elem),
+ current_line: 1,
+ };
+
+ // https://html.spec.whatwg.org/multipage/#parsing-html-fragments
+ // 5. Let root be a new html element with no attributes.
+ // 6. Append the element root to the Document node created above.
+ // 7. Set up the parser's stack of open elements so that it contains just the single element root.
+ tb.create_root(vec![]);
+ // 10. Reset the parser's insertion mode appropriately.
+ tb.mode = tb.reset_insertion_mode();
+
+ tb
+ }
+
+ // https://html.spec.whatwg.org/multipage/#concept-frag-parse-context
+ // Step 4. Set the state of the HTML parser's tokenization stage as follows:
+ pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State {
+ let elem = self.context_elem.as_ref().expect("no context element");
+ let name = match self.sink.elem_name(elem) {
+ ExpandedName {
+ ns: &ns!(html),
+ local,
+ } => local,
+ _ => return tok_state::Data,
+ };
+ match *name {
+ local_name!("title") | local_name!("textarea") => tok_state::RawData(tok_state::Rcdata),
+
+ local_name!("style") |
+ local_name!("xmp") |
+ local_name!("iframe") |
+ local_name!("noembed") |
+ local_name!("noframes") => tok_state::RawData(tok_state::Rawtext),
+
+ local_name!("script") => tok_state::RawData(tok_state::ScriptData),
+
+ local_name!("noscript") => {
+ if self.opts.scripting_enabled {
+ tok_state::RawData(tok_state::Rawtext)
+ } else {
+ tok_state::Data
+ }
+ },
+
+ local_name!("plaintext") => tok_state::Plaintext,
+
+ _ => tok_state::Data,
+ }
+ }
+
+ /// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's
+ /// internal state. This is intended to support garbage-collected DOMs.
+ pub fn trace_handles(&self, tracer: &Tracer<Handle = Handle>) {
+ tracer.trace_handle(&self.doc_handle);
+ for e in &self.open_elems {
+ tracer.trace_handle(e);
+ }
+ for e in &self.active_formatting {
+ match e {
+ &Element(ref h, _) => tracer.trace_handle(h),
+ _ => (),
+ }
+ }
+ self.head_elem.as_ref().map(|h| tracer.trace_handle(h));
+ self.form_elem.as_ref().map(|h| tracer.trace_handle(h));
+ self.context_elem.as_ref().map(|h| tracer.trace_handle(h));
+ }
+
+ #[allow(dead_code)]
+ fn dump_state(&self, label: String) {
+ println!("dump_state on {}", label);
+ print!(" open_elems:");
+ for node in self.open_elems.iter() {
+ let name = self.sink.elem_name(node);
+ match *name.ns {
+ ns!(html) => print!(" {}", name.local),
+ _ => panic!(),
+ }
+ }
+ println!("");
+ print!(" active_formatting:");
+ for entry in self.active_formatting.iter() {
+ match entry {
+ &Marker => print!(" Marker"),
+ &Element(ref h, _) => {
+ let name = self.sink.elem_name(h);
+ match *name.ns {
+ ns!(html) => print!(" {}", name.local),
+ _ => panic!(),
+ }
+ },
+ }
+ }
+ println!("");
+ }
+
+ fn debug_step(&self, mode: InsertionMode, token: &Token) {
+ if log_enabled!(Level::Debug) {
+ debug!(
+ "processing {} in insertion mode {:?}",
+ to_escaped_string(token),
+ mode
+ );
+ }
+ }
+
+ fn process_to_completion(&mut self, mut token: Token) -> TokenSinkResult<Handle> {
+ // Queue of additional tokens yet to be processed.
+ // This stays empty in the common case where we don't split whitespace.
+ let mut more_tokens = VecDeque::new();
+
+ loop {
+ let should_have_acknowledged_self_closing_flag = matches!(
+ token,
+ TagToken(Tag {
+ self_closing: true,
+ kind: StartTag,
+ ..
+ })
+ );
+ let result = if self.is_foreign(&token) {
+ self.step_foreign(token)
+ } else {
+ let mode = self.mode;
+ self.step(mode, token)
+ };
+ match result {
+ Done => {
+ if should_have_acknowledged_self_closing_flag {
+ self.sink
+ .parse_error(Borrowed("Unacknowledged self-closing tag"));
+ }
+ token = unwrap_or_return!(
+ more_tokens.pop_front(),
+ tokenizer::TokenSinkResult::Continue
+ );
+ },
+ DoneAckSelfClosing => {
+ token = unwrap_or_return!(
+ more_tokens.pop_front(),
+ tokenizer::TokenSinkResult::Continue
+ );
+ },
+ Reprocess(m, t) => {
+ self.mode = m;
+ token = t;
+ },
+ ReprocessForeign(t) => {
+ token = t;
+ },
+ SplitWhitespace(mut buf) => {
+ let p = buf.pop_front_char_run(is_ascii_whitespace);
+ let (first, is_ws) = unwrap_or_return!(p, tokenizer::TokenSinkResult::Continue);
+ let status = if is_ws { Whitespace } else { NotWhitespace };
+ token = CharacterTokens(status, first);
+
+ if buf.len32() > 0 {
+ more_tokens.push_back(CharacterTokens(NotSplit, buf));
+ }
+ },
+ Script(node) => {
+ assert!(more_tokens.is_empty());
+ return tokenizer::TokenSinkResult::Script(node);
+ },
+ ToPlaintext => {
+ assert!(more_tokens.is_empty());
+ return tokenizer::TokenSinkResult::Plaintext;
+ },
+ ToRawData(k) => {
+ assert!(more_tokens.is_empty());
+ return tokenizer::TokenSinkResult::RawData(k);
+ },
+ }
+ }
+ }
+
+ /// Are we parsing a HTML fragment?
+ pub fn is_fragment(&self) -> bool {
+ self.context_elem.is_some()
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node
+ fn appropriate_place_for_insertion(
+ &mut self,
+ override_target: Option<Handle>,
+ ) -> InsertionPoint<Handle> {
+ use self::tag_sets::*;
+
+ declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr");
+ let target = override_target.unwrap_or_else(|| self.current_node().clone());
+ if !(self.foster_parenting && self.elem_in(&target, foster_target)) {
+ if self.html_elem_named(&target, local_name!("template")) {
+ // No foster parenting (inside template).
+ let contents = self.sink.get_template_contents(&target);
+ return LastChild(contents);
+ } else {
+ // No foster parenting (the common case).
+ return LastChild(target);
+ }
+ }
+
+ // Foster parenting
+ let mut iter = self.open_elems.iter().rev().peekable();
+ while let Some(elem) = iter.next() {
+ if self.html_elem_named(&elem, local_name!("template")) {
+ let contents = self.sink.get_template_contents(&elem);
+ return LastChild(contents);
+ } else if self.html_elem_named(&elem, local_name!("table")) {
+ return TableFosterParenting {
+ element: elem.clone(),
+ prev_element: (*iter.peek().unwrap()).clone(),
+ };
+ }
+ }
+ let html_elem = self.html_elem();
+ LastChild(html_elem.clone())
+ }
+
+ fn insert_at(&mut self, insertion_point: InsertionPoint<Handle>, child: NodeOrText<Handle>) {
+ match insertion_point {
+ LastChild(parent) => self.sink.append(&parent, child),
+ BeforeSibling(sibling) => self.sink.append_before_sibling(&sibling, child),
+ TableFosterParenting {
+ element,
+ prev_element,
+ } => self
+ .sink
+ .append_based_on_parent_node(&element, &prev_element, child),
+ }
+ }
+}
+
+impl<Handle, Sink> TokenSink for TreeBuilder<Handle, Sink>
+where
+ Handle: Clone,
+ Sink: TreeSink<Handle = Handle>,
+{
+ type Handle = Handle;
+
+ fn process_token(
+ &mut self,
+ token: tokenizer::Token,
+ line_number: u64,
+ ) -> TokenSinkResult<Handle> {
+ if line_number != self.current_line {
+ self.sink.set_current_line(line_number);
+ }
+ let ignore_lf = replace(&mut self.ignore_lf, false);
+
+ // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type.
+ let token = match token {
+ tokenizer::ParseError(e) => {
+ self.sink.parse_error(e);
+ return tokenizer::TokenSinkResult::Continue;
+ },
+
+ tokenizer::DoctypeToken(dt) => {
+ if self.mode == Initial {
+ let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
+ if err {
+ self.sink.parse_error(format_if!(
+ self.opts.exact_errors,
+ "Bad DOCTYPE",
+ "Bad DOCTYPE: {:?}",
+ dt
+ ));
+ }
+ let Doctype {
+ name,
+ public_id,
+ system_id,
+ force_quirks: _,
+ } = dt;
+ if !self.opts.drop_doctype {
+ self.sink.append_doctype_to_document(
+ name.unwrap_or(StrTendril::new()),
+ public_id.unwrap_or(StrTendril::new()),
+ system_id.unwrap_or(StrTendril::new()),
+ );
+ }
+ self.set_quirks_mode(quirk);
+
+ self.mode = BeforeHtml;
+ return tokenizer::TokenSinkResult::Continue;
+ } else {
+ self.sink.parse_error(format_if!(
+ self.opts.exact_errors,
+ "DOCTYPE in body",
+ "DOCTYPE in insertion mode {:?}",
+ self.mode
+ ));
+ return tokenizer::TokenSinkResult::Continue;
+ }
+ },
+
+ tokenizer::TagToken(x) => TagToken(x),
+ tokenizer::CommentToken(x) => CommentToken(x),
+ tokenizer::NullCharacterToken => NullCharacterToken,
+ tokenizer::EOFToken => EOFToken,
+
+ tokenizer::CharacterTokens(mut x) => {
+ if ignore_lf && x.starts_with("\n") {
+ x.pop_front(1);
+ }
+ if x.is_empty() {
+ return tokenizer::TokenSinkResult::Continue;
+ }
+ CharacterTokens(NotSplit, x)
+ },
+ };
+
+ self.process_to_completion(token)
+ }
+
+ fn end(&mut self) {
+ for elem in self.open_elems.drain(..).rev() {
+ self.sink.pop(&elem);
+ }
+ }
+
+ fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
+ !self.open_elems.is_empty() &&
+ self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html)
+ }
+}
+
+pub fn html_elem<Handle>(open_elems: &[Handle]) -> &Handle {
+ &open_elems[0]
+}
+
+pub struct ActiveFormattingIter<'a, Handle: 'a> {
+ iter: Rev<Enumerate<slice::Iter<'a, FormatEntry<Handle>>>>,
+}
+
+impl<'a, Handle> Iterator for ActiveFormattingIter<'a, Handle> {
+ type Item = (usize, &'a Handle, &'a Tag);
+ fn next(&mut self) -> Option<(usize, &'a Handle, &'a Tag)> {
+ match self.iter.next() {
+ None | Some((_, &Marker)) => None,
+ Some((i, &Element(ref h, ref t))) => Some((i, h, t)),
+ }
+ }
+}
+
+pub enum PushFlag {
+ Push,
+ NoPush,
+}
+
+enum Bookmark<Handle> {
+ Replace(Handle),
+ InsertAfter(Handle),
+}
+
+macro_rules! qualname {
+ ("", $local:tt) => {
+ QualName {
+ prefix: None,
+ ns: ns!(),
+ local: local_name!($local),
+ }
+ };
+ ($prefix: tt $ns:tt $local:tt) => {
+ QualName {
+ prefix: Some(namespace_prefix!($prefix)),
+ ns: ns!($ns),
+ local: local_name!($local),
+ }
+ };
+}
+
+#[doc(hidden)]
+impl<Handle, Sink> TreeBuilder<Handle, Sink>
+where
+ Handle: Clone,
+ Sink: TreeSink<Handle = Handle>,
+{
+ fn unexpected<T: fmt::Debug>(&mut self, _thing: &T) -> ProcessResult<Handle> {
+ self.sink.parse_error(format_if!(
+ self.opts.exact_errors,
+ "Unexpected token",
+ "Unexpected token {} in insertion mode {:?}",
+ to_escaped_string(_thing),
+ self.mode
+ ));
+ Done
+ }
+
+ fn assert_named(&mut self, node: &Handle, name: LocalName) {
+ assert!(self.html_elem_named(&node, name));
+ }
+
+ /// Iterate over the active formatting elements (with index in the list) from the end
+ /// to the last marker, or the beginning if there are no markers.
+ fn active_formatting_end_to_marker<'a>(&'a self) -> ActiveFormattingIter<'a, Handle> {
+ ActiveFormattingIter {
+ iter: self.active_formatting.iter().enumerate().rev(),
+ }
+ }
+
+ fn position_in_active_formatting(&self, element: &Handle) -> Option<usize> {
+ self.active_formatting.iter().position(|n| match n {
+ &Marker => false,
+ &Element(ref handle, _) => self.sink.same_node(handle, element),
+ })
+ }
+
+ fn set_quirks_mode(&mut self, mode: QuirksMode) {
+ self.quirks_mode = mode;
+ self.sink.set_quirks_mode(mode);
+ }
+
+ fn stop_parsing(&mut self) -> ProcessResult<Handle> {
+ Done
+ }
+
+ //§ parsing-elements-that-contain-only-text
+ // Switch to `Text` insertion mode, save the old mode, and
+ // switch the tokenizer to a raw-data state.
+ // The latter only takes effect after the current / next
+ // `process_token` of a start tag returns!
+ fn to_raw_text_mode(&mut self, k: RawKind) -> ProcessResult<Handle> {
+ self.orig_mode = Some(self.mode);
+ self.mode = Text;
+ ToRawData(k)
+ }
+
+ // The generic raw text / RCDATA parsing algorithm.
+ fn parse_raw_data(&mut self, tag: Tag, k: RawKind) -> ProcessResult<Handle> {
+ self.insert_element_for(tag);
+ self.to_raw_text_mode(k)
+ }
+ //§ END
+
+ fn current_node(&self) -> &Handle {
+ self.open_elems.last().expect("no current element")
+ }
+
+ fn adjusted_current_node(&self) -> &Handle {
+ if self.open_elems.len() == 1 {
+ if let Some(ctx) = self.context_elem.as_ref() {
+ return ctx;
+ }
+ }
+ self.current_node()
+ }
+
+ fn current_node_in<TagSet>(&self, set: TagSet) -> bool
+ where
+ TagSet: Fn(ExpandedName) -> bool,
+ {
+ set(self.sink.elem_name(self.current_node()))
+ }
+
+ // Insert at the "appropriate place for inserting a node".
+ fn insert_appropriately(&mut self, child: NodeOrText<Handle>, override_target: Option<Handle>) {
+ let insertion_point = self.appropriate_place_for_insertion(override_target);
+ self.insert_at(insertion_point, child);
+ }
+
+ fn adoption_agency(&mut self, subject: LocalName) {
+ // 1.
+ if self.current_node_named(subject.clone()) {
+ if self
+ .position_in_active_formatting(self.current_node())
+ .is_none()
+ {
+ self.pop();
+ return;
+ }
+ }
+
+ // 2. 3. 4.
+ for _ in 0..8 {
+ // 5.
+ let (fmt_elem_index, fmt_elem, fmt_elem_tag) = unwrap_or_return!(
+ // We clone the Handle and Tag so they don't cause an immutable borrow of self.
+ self.active_formatting_end_to_marker()
+ .filter(|&(_, _, tag)| tag.name == subject)
+ .next()
+ .map(|(i, h, t)| (i, h.clone(), t.clone())),
+ {
+ self.process_end_tag_in_body(Tag {
+ kind: EndTag,
+ name: subject,
+ self_closing: false,
+ attrs: vec![],
+ });
+ }
+ );
+
+ let fmt_elem_stack_index = unwrap_or_return!(
+ self.open_elems
+ .iter()
+ .rposition(|n| self.sink.same_node(n, &fmt_elem)),
+ {
+ self.sink
+ .parse_error(Borrowed("Formatting element not open"));
+ self.active_formatting.remove(fmt_elem_index);
+ }
+ );
+
+ // 7.
+ if !self.in_scope(default_scope, |n| self.sink.same_node(&n, &fmt_elem)) {
+ self.sink
+ .parse_error(Borrowed("Formatting element not in scope"));
+ return;
+ }
+
+ // 8.
+ if !self.sink.same_node(self.current_node(), &fmt_elem) {
+ self.sink
+ .parse_error(Borrowed("Formatting element not current node"));
+ }
+
+ // 9.
+ let (furthest_block_index, furthest_block) = unwrap_or_return!(
+ self.open_elems
+ .iter()
+ .enumerate()
+ .skip(fmt_elem_stack_index)
+ .filter(|&(_, open_element)| self.elem_in(open_element, special_tag))
+ .next()
+ .map(|(i, h)| (i, h.clone())),
+ // 10.
+ {
+ self.open_elems.truncate(fmt_elem_stack_index);
+ self.active_formatting.remove(fmt_elem_index);
+ }
+ );
+
+ // 11.
+ let common_ancestor = self.open_elems[fmt_elem_stack_index - 1].clone();
+
+ // 12.
+ let mut bookmark = Bookmark::Replace(fmt_elem.clone());
+
+ // 13.
+ let mut node;
+ let mut node_index = furthest_block_index;
+ let mut last_node = furthest_block.clone();
+
+ // 13.1.
+ let mut inner_counter = 0;
+ loop {
+ // 13.2.
+ inner_counter += 1;
+
+ // 13.3.
+ node_index -= 1;
+ node = self.open_elems[node_index].clone();
+
+ // 13.4.
+ if self.sink.same_node(&node, &fmt_elem) {
+ break;
+ }
+
+ // 13.5.
+ if inner_counter > 3 {
+ self.position_in_active_formatting(&node)
+ .map(|position| self.active_formatting.remove(position));
+ self.open_elems.remove(node_index);
+ continue;
+ }
+
+ let node_formatting_index = unwrap_or_else!(
+ self.position_in_active_formatting(&node),
+ // 13.6.
+ {
+ self.open_elems.remove(node_index);
+ continue;
+ }
+ );
+
+ // 13.7.
+ let tag = match self.active_formatting[node_formatting_index] {
+ Element(ref h, ref t) => {
+ assert!(self.sink.same_node(h, &node));
+ t.clone()
+ },
+ Marker => panic!("Found marker during adoption agency"),
+ };
+ // FIXME: Is there a way to avoid cloning the attributes twice here (once on their
+ // own, once as part of t.clone() above)?
+ let new_element = create_element(
+ &mut self.sink,
+ QualName::new(None, ns!(html), tag.name.clone()),
+ tag.attrs.clone(),
+ );
+ self.open_elems[node_index] = new_element.clone();
+ self.active_formatting[node_formatting_index] = Element(new_element.clone(), tag);
+ node = new_element;
+
+ // 13.8.
+ if self.sink.same_node(&last_node, &furthest_block) {
+ bookmark = Bookmark::InsertAfter(node.clone());
+ }
+
+ // 13.9.
+ self.sink.remove_from_parent(&last_node);
+ self.sink.append(&node, AppendNode(last_node.clone()));
+
+ // 13.10.
+ last_node = node.clone();
+
+ // 13.11.
+ }
+
+ // 14.
+ self.sink.remove_from_parent(&last_node);
+ self.insert_appropriately(AppendNode(last_node.clone()), Some(common_ancestor));
+
+ // 15.
+ // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
+ // once as part of t.clone() above)?
+ let new_element = create_element(
+ &mut self.sink,
+ QualName::new(None, ns!(html), fmt_elem_tag.name.clone()),
+ fmt_elem_tag.attrs.clone(),
+ );
+ let new_entry = Element(new_element.clone(), fmt_elem_tag);
+
+ // 16.
+ self.sink.reparent_children(&furthest_block, &new_element);
+
+ // 17.
+ self.sink
+ .append(&furthest_block, AppendNode(new_element.clone()));
+
+ // 18.
+ // FIXME: We could probably get rid of the position_in_active_formatting() calls here
+ // if we had a more clever Bookmark representation.
+ match bookmark {
+ Bookmark::Replace(to_replace) => {
+ let index = self
+ .position_in_active_formatting(&to_replace)
+ .expect("bookmark not found in active formatting elements");
+ self.active_formatting[index] = new_entry;
+ },
+ Bookmark::InsertAfter(previous) => {
+ let index = self
+ .position_in_active_formatting(&previous)
+ .expect("bookmark not found in active formatting elements") +
+ 1;
+ self.active_formatting.insert(index, new_entry);
+ let old_index = self
+ .position_in_active_formatting(&fmt_elem)
+ .expect("formatting element not found in active formatting elements");
+ self.active_formatting.remove(old_index);
+ },
+ }
+
+ // 19.
+ self.remove_from_stack(&fmt_elem);
+ let new_furthest_block_index = self
+ .open_elems
+ .iter()
+ .position(|n| self.sink.same_node(n, &furthest_block))
+ .expect("furthest block missing from open element stack");
+ self.open_elems
+ .insert(new_furthest_block_index + 1, new_element);
+
+ // 20.
+ }
+ }
+
+ fn push(&mut self, elem: &Handle) {
+ self.open_elems.push(elem.clone());
+ }
+
+ fn pop(&mut self) -> Handle {
+ let elem = self.open_elems.pop().expect("no current element");
+ self.sink.pop(&elem);
+ elem
+ }
+
+ fn remove_from_stack(&mut self, elem: &Handle) {
+ let sink = &mut self.sink;
+ let position = self
+ .open_elems
+ .iter()
+ .rposition(|x| sink.same_node(elem, &x));
+ if let Some(position) = position {
+ self.open_elems.remove(position);
+ sink.pop(elem);
+ }
+ }
+
+ fn is_marker_or_open(&self, entry: &FormatEntry<Handle>) -> bool {
+ match *entry {
+ Marker => true,
+ Element(ref node, _) => self
+ .open_elems
+ .iter()
+ .rev()
+ .any(|n| self.sink.same_node(&n, &node)),
+ }
+ }
+
+ /// Reconstruct the active formatting elements.
+ fn reconstruct_formatting(&mut self) {
+ {
+ let last = unwrap_or_return!(self.active_formatting.last(), ());
+ if self.is_marker_or_open(last) {
+ return;
+ }
+ }
+
+ let mut entry_index = self.active_formatting.len() - 1;
+ loop {
+ if entry_index == 0 {
+ break;
+ }
+ entry_index -= 1;
+ if self.is_marker_or_open(&self.active_formatting[entry_index]) {
+ entry_index += 1;
+ break;
+ }
+ }
+
+ loop {
+ let tag = match self.active_formatting[entry_index] {
+ Element(_, ref t) => t.clone(),
+ Marker => panic!("Found marker during formatting element reconstruction"),
+ };
+
+ // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
+ // once as part of t.clone() above)?
+ let new_element =
+ self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone());
+ self.active_formatting[entry_index] = Element(new_element, tag);
+ if entry_index == self.active_formatting.len() - 1 {
+ break;
+ }
+ entry_index += 1;
+ }
+ }
+
+ /// Get the first element on the stack, which will be the <html> element.
+ fn html_elem(&self) -> &Handle {
+ &self.open_elems[0]
+ }
+
+ /// Get the second element on the stack, if it's a HTML body element.
+ fn body_elem(&self) -> Option<&Handle> {
+ if self.open_elems.len() <= 1 {
+ return None;
+ }
+
+ let node = &self.open_elems[1];
+ if self.html_elem_named(node, local_name!("body")) {
+ Some(node)
+ } else {
+ None
+ }
+ }
+
+ /// Signal an error depending on the state of the stack of open elements at
+ /// the end of the body.
+ fn check_body_end(&mut self) {
+ declare_tag_set!(body_end_ok =
+ "dd" "dt" "li" "optgroup" "option" "p" "rp" "rt" "tbody" "td" "tfoot" "th"
+ "thead" "tr" "body" "html");
+
+ for elem in self.open_elems.iter() {
+ let error;
+ {
+ let name = self.sink.elem_name(elem);
+ if body_end_ok(name) {
+ continue;
+ }
+ error = format_if!(
+ self.opts.exact_errors,
+ "Unexpected open tag at end of body",
+ "Unexpected open tag {:?} at end of body",
+ name
+ );
+ }
+ self.sink.parse_error(error);
+ // FIXME: Do we keep checking after finding one bad tag?
+ // The spec suggests not.
+ return;
+ }
+ }
+
+ fn in_scope<TagSet, Pred>(&self, scope: TagSet, pred: Pred) -> bool
+ where
+ TagSet: Fn(ExpandedName) -> bool,
+ Pred: Fn(Handle) -> bool,
+ {
+ for node in self.open_elems.iter().rev() {
+ if pred(node.clone()) {
+ return true;
+ }
+ if scope(self.sink.elem_name(node)) {
+ return false;
+ }
+ }
+
+ // supposed to be impossible, because <html> is always in scope
+
+ false
+ }
+
+ fn elem_in<TagSet>(&self, elem: &Handle, set: TagSet) -> bool
+ where
+ TagSet: Fn(ExpandedName) -> bool,
+ {
+ set(self.sink.elem_name(elem))
+ }
+
+ fn html_elem_named(&self, elem: &Handle, name: LocalName) -> bool {
+ let expanded = self.sink.elem_name(elem);
+ *expanded.ns == ns!(html) && *expanded.local == name
+ }
+
+ fn in_html_elem_named(&self, name: LocalName) -> bool {
+ self.open_elems
+ .iter()
+ .any(|elem| self.html_elem_named(elem, name.clone()))
+ }
+
+ fn current_node_named(&self, name: LocalName) -> bool {
+ self.html_elem_named(self.current_node(), name)
+ }
+
+ fn in_scope_named<TagSet>(&self, scope: TagSet, name: LocalName) -> bool
+ where
+ TagSet: Fn(ExpandedName) -> bool,
+ {
+ self.in_scope(scope, |elem| self.html_elem_named(&elem, name.clone()))
+ }
+
+ //§ closing-elements-that-have-implied-end-tags
+ fn generate_implied_end<TagSet>(&mut self, set: TagSet)
+ where
+ TagSet: Fn(ExpandedName) -> bool,
+ {
+ loop {
+ {
+ let elem = unwrap_or_return!(self.open_elems.last(), ());
+ let nsname = self.sink.elem_name(elem);
+ if !set(nsname) {
+ return;
+ }
+ }
+ self.pop();
+ }
+ }
+
+ fn generate_implied_end_except(&mut self, except: LocalName) {
+ self.generate_implied_end(|p| {
+ if *p.ns == ns!(html) && *p.local == except {
+ false
+ } else {
+ cursory_implied_end(p)
+ }
+ });
+ }
+ //§ END
+
+ // Pop elements until the current element is in the set.
+ fn pop_until_current<TagSet>(&mut self, pred: TagSet)
+ where
+ TagSet: Fn(ExpandedName) -> bool,
+ {
+ loop {
+ if self.current_node_in(|x| pred(x)) {
+ break;
+ }
+ self.open_elems.pop();
+ }
+ }
+
+ // Pop elements until an element from the set has been popped. Returns the
+ // number of elements popped.
+ fn pop_until<P>(&mut self, pred: P) -> usize
+ where
+ P: Fn(ExpandedName) -> bool,
+ {
+ let mut n = 0;
+ loop {
+ n += 1;
+ match self.open_elems.pop() {
+ None => break,
+ Some(elem) => {
+ if pred(self.sink.elem_name(&elem)) {
+ break;
+ }
+ },
+ }
+ }
+ n
+ }
+
+ fn pop_until_named(&mut self, name: LocalName) -> usize {
+ self.pop_until(|p| *p.ns == ns!(html) && *p.local == name)
+ }
+
+ // Pop elements until one with the specified name has been popped.
+ // Signal an error if it was not the first one.
+ fn expect_to_close(&mut self, name: LocalName) {
+ if self.pop_until_named(name.clone()) != 1 {
+ self.sink.parse_error(format_if!(
+ self.opts.exact_errors,
+ "Unexpected open element",
+ "Unexpected open element while closing {:?}",
+ name
+ ));
+ }
+ }
+
+ fn close_p_element(&mut self) {
+ declare_tag_set!(implied = [cursory_implied_end] - "p");
+ self.generate_implied_end(implied);
+ self.expect_to_close(local_name!("p"));
+ }
+
+ fn close_p_element_in_button_scope(&mut self) {
+ if self.in_scope_named(button_scope, local_name!("p")) {
+ self.close_p_element();
+ }
+ }
+
+ // Check <input> tags for type=hidden
+ fn is_type_hidden(&self, tag: &Tag) -> bool {
+ match tag
+ .attrs
+ .iter()
+ .find(|&at| at.name.expanded() == expanded_name!("", "type"))
+ {
+ None => false,
+ Some(at) => (&*at.value).eq_ignore_ascii_case("hidden"),
+ }
+ }
+
+ fn foster_parent_in_body(&mut self, token: Token) -> ProcessResult<Handle> {
+ warn!("foster parenting not implemented");
+ self.foster_parenting = true;
+ let res = self.step(InBody, token);
+ // FIXME: what if res is Reprocess?
+ self.foster_parenting = false;
+ res
+ }
+
+ fn process_chars_in_table(&mut self, token: Token) -> ProcessResult<Handle> {
+ declare_tag_set!(table_outer = "table" "tbody" "tfoot" "thead" "tr");
+ if self.current_node_in(table_outer) {
+ assert!(self.pending_table_text.is_empty());
+ self.orig_mode = Some(self.mode);
+ Reprocess(InTableText, token)
+ } else {
+ self.sink.parse_error(format_if!(
+ self.opts.exact_errors,
+ "Unexpected characters in table",
+ "Unexpected characters {} in table",
+ to_escaped_string(&token)
+ ));
+ self.foster_parent_in_body(token)
+ }
+ }
+
+ // https://html.spec.whatwg.org/multipage/#reset-the-insertion-mode-appropriately
+ fn reset_insertion_mode(&mut self) -> InsertionMode {
+ for (i, mut node) in self.open_elems.iter().enumerate().rev() {
+ let last = i == 0usize;
+ if let (true, Some(ctx)) = (last, self.context_elem.as_ref()) {
+ node = ctx;
+ }
+ let name = match self.sink.elem_name(node) {
+ ExpandedName {
+ ns: &ns!(html),
+ local,
+ } => local,
+ _ => continue,
+ };
+ match *name {
+ local_name!("select") => {
+ for ancestor in self.open_elems[0..i].iter().rev() {
+ if self.html_elem_named(ancestor, local_name!("template")) {
+ return InSelect;
+ } else if self.html_elem_named(ancestor, local_name!("table")) {
+ return InSelectInTable;
+ }
+ }
+ return InSelect;
+ },
+ local_name!("td") | local_name!("th") => {
+ if !last {
+ return InCell;
+ }
+ },
+ local_name!("tr") => return InRow,
+ local_name!("tbody") | local_name!("thead") | local_name!("tfoot") => {
+ return InTableBody;
+ },
+ local_name!("caption") => return InCaption,
+ local_name!("colgroup") => return InColumnGroup,
+ local_name!("table") => return InTable,
+ local_name!("template") => return *self.template_modes.last().unwrap(),
+ local_name!("head") => {
+ if !last {
+ return InHead;
+ }
+ },
+ local_name!("body") => return InBody,
+ local_name!("frameset") => return InFrameset,
+ local_name!("html") => match self.head_elem {
+ None => return BeforeHead,
+ Some(_) => return AfterHead,
+ },
+
+ _ => (),
+ }
+ }
+ InBody
+ }
+
+ fn close_the_cell(&mut self) {
+ self.generate_implied_end(cursory_implied_end);
+ if self.pop_until(td_th) != 1 {
+ self.sink
+ .parse_error(Borrowed("expected to close <td> or <th> with cell"));
+ }
+ self.clear_active_formatting_to_marker();
+ }
+
+ fn append_text(&mut self, text: StrTendril) -> ProcessResult<Handle> {
+ self.insert_appropriately(AppendText(text), None);
+ Done
+ }
+
+ fn append_comment(&mut self, text: StrTendril) -> ProcessResult<Handle> {
+ let comment = self.sink.create_comment(text);
+ self.insert_appropriately(AppendNode(comment), None);
+ Done
+ }
+
+ fn append_comment_to_doc(&mut self, text: StrTendril) -> ProcessResult<Handle> {
+ let comment = self.sink.create_comment(text);
+ self.sink.append(&self.doc_handle, AppendNode(comment));
+ Done
+ }
+
+ fn append_comment_to_html(&mut self, text: StrTendril) -> ProcessResult<Handle> {
+ let target = html_elem(&self.open_elems);
+ let comment = self.sink.create_comment(text);
+ self.sink.append(target, AppendNode(comment));
+ Done
+ }
+
+ //§ creating-and-inserting-nodes
+ fn create_root(&mut self, attrs: Vec<Attribute>) {
+ let elem = create_element(
+ &mut self.sink,
+ QualName::new(None, ns!(html), local_name!("html")),
+ attrs,
+ );
+ self.push(&elem);
+ self.sink.append(&self.doc_handle, AppendNode(elem));
+ // FIXME: application cache selection algorithm
+ }
+
+ // https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token
+ fn insert_element(
+ &mut self,
+ push: PushFlag,
+ ns: Namespace,
+ name: LocalName,
+ attrs: Vec<Attribute>,
+ ) -> Handle {
+ declare_tag_set!(form_associatable =
+ "button" "fieldset" "input" "object"
+ "output" "select" "textarea" "img");
+
+ declare_tag_set!(listed = [form_associatable] - "img");
+
+ // Step 7.
+ let qname = QualName::new(None, ns, name);
+ let elem = create_element(&mut self.sink, qname.clone(), attrs.clone());
+
+ let insertion_point = self.appropriate_place_for_insertion(None);
+ let (node1, node2) = match insertion_point {
+ LastChild(ref p) | BeforeSibling(ref p) => (p.clone(), None),
+ TableFosterParenting {
+ ref element,
+ ref prev_element,
+ } => (element.clone(), Some(prev_element.clone())),
+ };
+
+ // Step 12.
+ if form_associatable(qname.expanded()) &&
+ self.form_elem.is_some() &&
+ !self.in_html_elem_named(local_name!("template")) &&
+ !(listed(qname.expanded()) &&
+ attrs
+ .iter()
+ .any(|a| a.name.expanded() == expanded_name!("", "form")))
+ {
+ let form = self.form_elem.as_ref().unwrap().clone();
+ let node2 = match node2 {
+ Some(ref n) => Some(n),
+ None => None,
+ };
+ self.sink.associate_with_form(&elem, &form, (&node1, node2));
+ }
+
+ self.insert_at(insertion_point, AppendNode(elem.clone()));
+
+ match push {
+ Push => self.push(&elem),
+ NoPush => (),
+ }
+ // FIXME: Remove from the stack if we can't append?
+ elem
+ }
+
+ fn insert_element_for(&mut self, tag: Tag) -> Handle {
+ self.insert_element(Push, ns!(html), tag.name, tag.attrs)
+ }
+
+ fn insert_and_pop_element_for(&mut self, tag: Tag) -> Handle {
+ self.insert_element(NoPush, ns!(html), tag.name, tag.attrs)
+ }
+
+ fn insert_phantom(&mut self, name: LocalName) -> Handle {
+ self.insert_element(Push, ns!(html), name, vec![])
+ }
+ //§ END
+
+ fn create_formatting_element_for(&mut self, tag: Tag) -> Handle {
+ // FIXME: This really wants unit tests.
+ let mut first_match = None;
+ let mut matches = 0usize;
+ for (i, _, old_tag) in self.active_formatting_end_to_marker() {
+ if tag.equiv_modulo_attr_order(old_tag) {
+ first_match = Some(i);
+ matches += 1;
+ }
+ }
+
+ if matches >= 3 {
+ self.active_formatting
+ .remove(first_match.expect("matches with no index"));
+ }
+
+ let elem = self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone());
+ self.active_formatting.push(Element(elem.clone(), tag));
+ elem
+ }
+
+ fn clear_active_formatting_to_marker(&mut self) {
+ loop {
+ match self.active_formatting.pop() {
+ None | Some(Marker) => break,
+ _ => (),
+ }
+ }
+ }
+
+ fn process_end_tag_in_body(&mut self, tag: Tag) {
+ // Look back for a matching open element.
+ let mut match_idx = None;
+ for (i, elem) in self.open_elems.iter().enumerate().rev() {
+ if self.html_elem_named(elem, tag.name.clone()) {
+ match_idx = Some(i);
+ break;
+ }
+
+ if self.elem_in(elem, special_tag) {
+ self.sink
+ .parse_error(Borrowed("Found special tag while closing generic tag"));
+ return;
+ }
+ }
+
+ // Can't use unwrap_or_return!() due to rust-lang/rust#16617.
+ let match_idx = match match_idx {
+ None => {
+ // I believe this is impossible, because the root
+ // <html> element is in special_tag.
+ self.unexpected(&tag);
+ return;
+ },
+ Some(x) => x,
+ };
+
+ self.generate_implied_end_except(tag.name.clone());
+
+ if match_idx != self.open_elems.len() - 1 {
+ // mis-nested tags
+ self.unexpected(&tag);
+ }
+ self.open_elems.truncate(match_idx);
+ }
+
+ fn handle_misnested_a_tags(&mut self, tag: &Tag) {
+ let node = unwrap_or_return!(
+ self.active_formatting_end_to_marker()
+ .filter(|&(_, n, _)| self.html_elem_named(n, local_name!("a")))
+ .next()
+ .map(|(_, n, _)| n.clone()),
+ ()
+ );
+
+ self.unexpected(tag);
+ self.adoption_agency(local_name!("a"));
+ self.position_in_active_formatting(&node)
+ .map(|index| self.active_formatting.remove(index));
+ self.remove_from_stack(&node);
+ }
+
+ //§ tree-construction
+ fn is_foreign(&mut self, token: &Token) -> bool {
+ if let EOFToken = *token {
+ return false;
+ }
+
+ if self.open_elems.is_empty() {
+ return false;
+ }
+
+ let name = self.sink.elem_name(self.adjusted_current_node());
+ if let ns!(html) = *name.ns {
+ return false;
+ }
+
+ if mathml_text_integration_point(name) {
+ match *token {
+ CharacterTokens(..) | NullCharacterToken => return false,
+ TagToken(Tag {
+ kind: StartTag,
+ ref name,
+ ..
+ }) if !matches!(*name, local_name!("mglyph") | local_name!("malignmark")) => {
+ return false;
+ },
+ _ => (),
+ }
+ }
+
+ if svg_html_integration_point(name) {
+ match *token {
+ CharacterTokens(..) | NullCharacterToken => return false,
+ TagToken(Tag { kind: StartTag, .. }) => return false,
+ _ => (),
+ }
+ }
+
+ if let expanded_name!(mathml "annotation-xml") = name {
+ match *token {
+ TagToken(Tag {
+ kind: StartTag,
+ name: local_name!("svg"),
+ ..
+ }) => return false,
+ CharacterTokens(..) | NullCharacterToken | TagToken(Tag { kind: StartTag, .. }) => {
+ return !self
+ .sink
+ .is_mathml_annotation_xml_integration_point(self.adjusted_current_node());
+ },
+ _ => {},
+ };
+ }
+
+ true
+ }
+ //§ END
+
+ fn enter_foreign(&mut self, mut tag: Tag, ns: Namespace) -> ProcessResult<Handle> {
+ match ns {
+ ns!(mathml) => self.adjust_mathml_attributes(&mut tag),
+ ns!(svg) => self.adjust_svg_attributes(&mut tag),
+ _ => (),
+ }
+ self.adjust_foreign_attributes(&mut tag);
+
+ if tag.self_closing {
+ self.insert_element(NoPush, ns, tag.name, tag.attrs);
+ DoneAckSelfClosing
+ } else {
+ self.insert_element(Push, ns, tag.name, tag.attrs);
+ Done
+ }
+ }
+
+ fn adjust_svg_tag_name(&mut self, tag: &mut Tag) {
+ let Tag { ref mut name, .. } = *tag;
+ match *name {
+ local_name!("altglyph") => *name = local_name!("altGlyph"),
+ local_name!("altglyphdef") => *name = local_name!("altGlyphDef"),
+ local_name!("altglyphitem") => *name = local_name!("altGlyphItem"),
+ local_name!("animatecolor") => *name = local_name!("animateColor"),
+ local_name!("animatemotion") => *name = local_name!("animateMotion"),
+ local_name!("animatetransform") => *name = local_name!("animateTransform"),
+ local_name!("clippath") => *name = local_name!("clipPath"),
+ local_name!("feblend") => *name = local_name!("feBlend"),
+ local_name!("fecolormatrix") => *name = local_name!("feColorMatrix"),
+ local_name!("fecomponenttransfer") => *name = local_name!("feComponentTransfer"),
+ local_name!("fecomposite") => *name = local_name!("feComposite"),
+ local_name!("feconvolvematrix") => *name = local_name!("feConvolveMatrix"),
+ local_name!("fediffuselighting") => *name = local_name!("feDiffuseLighting"),
+ local_name!("fedisplacementmap") => *name = local_name!("feDisplacementMap"),
+ local_name!("fedistantlight") => *name = local_name!("feDistantLight"),
+ local_name!("fedropshadow") => *name = local_name!("feDropShadow"),
+ local_name!("feflood") => *name = local_name!("feFlood"),
+ local_name!("fefunca") => *name = local_name!("feFuncA"),
+ local_name!("fefuncb") => *name = local_name!("feFuncB"),
+ local_name!("fefuncg") => *name = local_name!("feFuncG"),
+ local_name!("fefuncr") => *name = local_name!("feFuncR"),
+ local_name!("fegaussianblur") => *name = local_name!("feGaussianBlur"),
+ local_name!("feimage") => *name = local_name!("feImage"),
+ local_name!("femerge") => *name = local_name!("feMerge"),
+ local_name!("femergenode") => *name = local_name!("feMergeNode"),
+ local_name!("femorphology") => *name = local_name!("feMorphology"),
+ local_name!("feoffset") => *name = local_name!("feOffset"),
+ local_name!("fepointlight") => *name = local_name!("fePointLight"),
+ local_name!("fespecularlighting") => *name = local_name!("feSpecularLighting"),
+ local_name!("fespotlight") => *name = local_name!("feSpotLight"),
+ local_name!("fetile") => *name = local_name!("feTile"),
+ local_name!("feturbulence") => *name = local_name!("feTurbulence"),
+ local_name!("foreignobject") => *name = local_name!("foreignObject"),
+ local_name!("glyphref") => *name = local_name!("glyphRef"),
+ local_name!("lineargradient") => *name = local_name!("linearGradient"),
+ local_name!("radialgradient") => *name = local_name!("radialGradient"),
+ local_name!("textpath") => *name = local_name!("textPath"),
+ _ => (),
+ }
+ }
+
+ fn adjust_attributes<F>(&mut self, tag: &mut Tag, mut map: F)
+ where
+ F: FnMut(LocalName) -> Option<QualName>,
+ {
+ for &mut Attribute { ref mut name, .. } in &mut tag.attrs {
+ if let Some(replacement) = map(name.local.clone()) {
+ *name = replacement;
+ }
+ }
+ }
+
+ fn adjust_svg_attributes(&mut self, tag: &mut Tag) {
+ self.adjust_attributes(tag, |k| match k {
+ local_name!("attributename") => Some(qualname!("", "attributeName")),
+ local_name!("attributetype") => Some(qualname!("", "attributeType")),
+ local_name!("basefrequency") => Some(qualname!("", "baseFrequency")),
+ local_name!("baseprofile") => Some(qualname!("", "baseProfile")),
+ local_name!("calcmode") => Some(qualname!("", "calcMode")),
+ local_name!("clippathunits") => Some(qualname!("", "clipPathUnits")),
+ local_name!("diffuseconstant") => Some(qualname!("", "diffuseConstant")),
+ local_name!("edgemode") => Some(qualname!("", "edgeMode")),
+ local_name!("filterunits") => Some(qualname!("", "filterUnits")),
+ local_name!("glyphref") => Some(qualname!("", "glyphRef")),
+ local_name!("gradienttransform") => Some(qualname!("", "gradientTransform")),
+ local_name!("gradientunits") => Some(qualname!("", "gradientUnits")),
+ local_name!("kernelmatrix") => Some(qualname!("", "kernelMatrix")),
+ local_name!("kernelunitlength") => Some(qualname!("", "kernelUnitLength")),
+ local_name!("keypoints") => Some(qualname!("", "keyPoints")),
+ local_name!("keysplines") => Some(qualname!("", "keySplines")),
+ local_name!("keytimes") => Some(qualname!("", "keyTimes")),
+ local_name!("lengthadjust") => Some(qualname!("", "lengthAdjust")),
+ local_name!("limitingconeangle") => Some(qualname!("", "limitingConeAngle")),
+ local_name!("markerheight") => Some(qualname!("", "markerHeight")),
+ local_name!("markerunits") => Some(qualname!("", "markerUnits")),
+ local_name!("markerwidth") => Some(qualname!("", "markerWidth")),
+ local_name!("maskcontentunits") => Some(qualname!("", "maskContentUnits")),
+ local_name!("maskunits") => Some(qualname!("", "maskUnits")),
+ local_name!("numoctaves") => Some(qualname!("", "numOctaves")),
+ local_name!("pathlength") => Some(qualname!("", "pathLength")),
+ local_name!("patterncontentunits") => Some(qualname!("", "patternContentUnits")),
+ local_name!("patterntransform") => Some(qualname!("", "patternTransform")),
+ local_name!("patternunits") => Some(qualname!("", "patternUnits")),
+ local_name!("pointsatx") => Some(qualname!("", "pointsAtX")),
+ local_name!("pointsaty") => Some(qualname!("", "pointsAtY")),
+ local_name!("pointsatz") => Some(qualname!("", "pointsAtZ")),
+ local_name!("preservealpha") => Some(qualname!("", "preserveAlpha")),
+ local_name!("preserveaspectratio") => Some(qualname!("", "preserveAspectRatio")),
+ local_name!("primitiveunits") => Some(qualname!("", "primitiveUnits")),
+ local_name!("refx") => Some(qualname!("", "refX")),
+ local_name!("refy") => Some(qualname!("", "refY")),
+ local_name!("repeatcount") => Some(qualname!("", "repeatCount")),
+ local_name!("repeatdur") => Some(qualname!("", "repeatDur")),
+ local_name!("requiredextensions") => Some(qualname!("", "requiredExtensions")),
+ local_name!("requiredfeatures") => Some(qualname!("", "requiredFeatures")),
+ local_name!("specularconstant") => Some(qualname!("", "specularConstant")),
+ local_name!("specularexponent") => Some(qualname!("", "specularExponent")),
+ local_name!("spreadmethod") => Some(qualname!("", "spreadMethod")),
+ local_name!("startoffset") => Some(qualname!("", "startOffset")),
+ local_name!("stddeviation") => Some(qualname!("", "stdDeviation")),
+ local_name!("stitchtiles") => Some(qualname!("", "stitchTiles")),
+ local_name!("surfacescale") => Some(qualname!("", "surfaceScale")),
+ local_name!("systemlanguage") => Some(qualname!("", "systemLanguage")),
+ local_name!("tablevalues") => Some(qualname!("", "tableValues")),
+ local_name!("targetx") => Some(qualname!("", "targetX")),
+ local_name!("targety") => Some(qualname!("", "targetY")),
+ local_name!("textlength") => Some(qualname!("", "textLength")),
+ local_name!("viewbox") => Some(qualname!("", "viewBox")),
+ local_name!("viewtarget") => Some(qualname!("", "viewTarget")),
+ local_name!("xchannelselector") => Some(qualname!("", "xChannelSelector")),
+ local_name!("ychannelselector") => Some(qualname!("", "yChannelSelector")),
+ local_name!("zoomandpan") => Some(qualname!("", "zoomAndPan")),
+ _ => None,
+ });
+ }
+
+ fn adjust_mathml_attributes(&mut self, tag: &mut Tag) {
+ self.adjust_attributes(tag, |k| match k {
+ local_name!("definitionurl") => Some(qualname!("", "definitionURL")),
+ _ => None,
+ });
+ }
+
+ fn adjust_foreign_attributes(&mut self, tag: &mut Tag) {
+ self.adjust_attributes(tag, |k| match k {
+ local_name!("xlink:actuate") => Some(qualname!("xlink" xlink "actuate")),
+ local_name!("xlink:arcrole") => Some(qualname!("xlink" xlink "arcrole")),
+ local_name!("xlink:href") => Some(qualname!("xlink" xlink "href")),
+ local_name!("xlink:role") => Some(qualname!("xlink" xlink "role")),
+ local_name!("xlink:show") => Some(qualname!("xlink" xlink "show")),
+ local_name!("xlink:title") => Some(qualname!("xlink" xlink "title")),
+ local_name!("xlink:type") => Some(qualname!("xlink" xlink "type")),
+ local_name!("xml:base") => Some(qualname!("xml" xml "base")),
+ local_name!("xml:lang") => Some(qualname!("xml" xml "lang")),
+ local_name!("xml:space") => Some(qualname!("xml" xml "space")),
+ local_name!("xmlns") => Some(qualname!("" xmlns "xmlns")),
+ local_name!("xmlns:xlink") => Some(qualname!("xmlns" xmlns "xlink")),
+ _ => None,
+ });
+ }
+
+ fn foreign_start_tag(&mut self, mut tag: Tag) -> ProcessResult<Handle> {
+ let current_ns = self.sink.elem_name(self.adjusted_current_node()).ns.clone();
+ match current_ns {
+ ns!(mathml) => self.adjust_mathml_attributes(&mut tag),
+ ns!(svg) => {
+ self.adjust_svg_tag_name(&mut tag);
+ self.adjust_svg_attributes(&mut tag);
+ },
+ _ => (),
+ }
+ self.adjust_foreign_attributes(&mut tag);
+ if tag.self_closing {
+ // FIXME(#118): <script /> in SVG
+ self.insert_element(NoPush, current_ns, tag.name, tag.attrs);
+ DoneAckSelfClosing
+ } else {
+ self.insert_element(Push, current_ns, tag.name, tag.attrs);
+ Done
+ }
+ }
+
+ fn unexpected_start_tag_in_foreign_content(&mut self, tag: Tag) -> ProcessResult<Handle> {
+ self.unexpected(&tag);
+ if self.is_fragment() {
+ self.foreign_start_tag(tag)
+ } else {
+ self.pop();
+ while !self.current_node_in(|n| {
+ *n.ns == ns!(html) ||
+ mathml_text_integration_point(n) ||
+ svg_html_integration_point(n)
+ }) {
+ self.pop();
+ }
+ ReprocessForeign(TagToken(tag))
+ }
+ }
+}
diff --git a/src/tree_builder/rules.rs b/src/tree_builder/rules.rs
new file mode 100644
index 0000000..bdc8afd
--- /dev/null
+++ b/src/tree_builder/rules.rs
@@ -0,0 +1,1449 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// The tree builder rules, as a single, enormous nested match expression.
+
+use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns};
+use crate::tokenizer::states::{Plaintext, Rawtext, Rcdata, ScriptData};
+use crate::tree_builder::tag_sets::*;
+use crate::tree_builder::types::*;
+
+use std::borrow::ToOwned;
+
+use crate::tendril::SliceExt;
+
+fn any_not_whitespace(x: &StrTendril) -> bool {
+ // FIXME: this might be much faster as a byte scan
+ x.chars().any(|c| !is_ascii_whitespace(c))
+}
+
+fn current_node<Handle>(open_elems: &[Handle]) -> &Handle {
+ open_elems.last().expect("no current element")
+}
+
+#[doc(hidden)]
+impl<Handle, Sink> TreeBuilder<Handle, Sink>
+where
+ Handle: Clone,
+ Sink: TreeSink<Handle = Handle>,
+{
+ fn step(&mut self, mode: InsertionMode, token: Token) -> ProcessResult<Handle> {
+ self.debug_step(mode, &token);
+
+ match mode {
+ //§ the-initial-insertion-mode
+ Initial => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, _) => Done,
+ CommentToken(text) => self.append_comment_to_doc(text),
+ token => {
+ if !self.opts.iframe_srcdoc {
+ self.unexpected(&token);
+ self.set_quirks_mode(Quirks);
+ }
+ Reprocess(BeforeHtml, token)
+ }
+ }),
+
+ //§ the-before-html-insertion-mode
+ BeforeHtml => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, _) => Done,
+ CommentToken(text) => self.append_comment_to_doc(text),
+
+ tag @ <html> => {
+ self.create_root(tag.attrs);
+ self.mode = BeforeHead;
+ Done
+ }
+
+ </head> </body> </html> </br> => else,
+
+ tag @ </_> => self.unexpected(&tag),
+
+ token => {
+ self.create_root(vec!());
+ Reprocess(BeforeHead, token)
+ }
+ }),
+
+ //§ the-before-head-insertion-mode
+ BeforeHead => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, _) => Done,
+ CommentToken(text) => self.append_comment(text),
+
+ <html> => self.step(InBody, token),
+
+ tag @ <head> => {
+ self.head_elem = Some(self.insert_element_for(tag));
+ self.mode = InHead;
+ Done
+ }
+
+ </head> </body> </html> </br> => else,
+
+ tag @ </_> => self.unexpected(&tag),
+
+ token => {
+ self.head_elem = Some(self.insert_phantom(local_name!("head")));
+ Reprocess(InHead, token)
+ }
+ }),
+
+ //§ parsing-main-inhead
+ InHead => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, text) => self.append_text(text),
+ CommentToken(text) => self.append_comment(text),
+
+ <html> => self.step(InBody, token),
+
+ tag @ <base> <basefont> <bgsound> <link> <meta> => {
+ // FIXME: handle <meta charset=...> and <meta http-equiv="Content-Type">
+ self.insert_and_pop_element_for(tag);
+ DoneAckSelfClosing
+ }
+
+ tag @ <title> => {
+ self.parse_raw_data(tag, Rcdata)
+ }
+
+ tag @ <noframes> <style> <noscript> => {
+ if (!self.opts.scripting_enabled) && (tag.name == local_name!("noscript")) {
+ self.insert_element_for(tag);
+ self.mode = InHeadNoscript;
+ Done
+ } else {
+ self.parse_raw_data(tag, Rawtext)
+ }
+ }
+
+ tag @ <script> => {
+ let elem = create_element(
+ &mut self.sink, QualName::new(None, ns!(html), local_name!("script")),
+ tag.attrs);
+ if self.is_fragment() {
+ self.sink.mark_script_already_started(&elem);
+ }
+ self.insert_appropriately(AppendNode(elem.clone()), None);
+ self.open_elems.push(elem);
+ self.to_raw_text_mode(ScriptData)
+ }
+
+ </head> => {
+ self.pop();
+ self.mode = AfterHead;
+ Done
+ }
+
+ </body> </html> </br> => else,
+
+ tag @ <template> => {
+ self.insert_element_for(tag);
+ self.active_formatting.push(Marker);
+ self.frameset_ok = false;
+ self.mode = InTemplate;
+ self.template_modes.push(InTemplate);
+ Done
+ }
+
+ tag @ </template> => {
+ if !self.in_html_elem_named(local_name!("template")) {
+ self.unexpected(&tag);
+ } else {
+ self.generate_implied_end(thorough_implied_end);
+ self.expect_to_close(local_name!("template"));
+ self.clear_active_formatting_to_marker();
+ self.template_modes.pop();
+ self.mode = self.reset_insertion_mode();
+ }
+ Done
+ }
+
+ <head> => self.unexpected(&token),
+ tag @ </_> => self.unexpected(&tag),
+
+ token => {
+ self.pop();
+ Reprocess(AfterHead, token)
+ }
+ }),
+
+ //§ parsing-main-inheadnoscript
+ InHeadNoscript => match_token!(token {
+ <html> => self.step(InBody, token),
+
+ </noscript> => {
+ self.pop();
+ self.mode = InHead;
+ Done
+ },
+
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, _) => self.step(InHead, token),
+
+ CommentToken(_) => self.step(InHead, token),
+
+ <basefont> <bgsound> <link> <meta> <noframes> <style>
+ => self.step(InHead, token),
+
+ </br> => else,
+
+ <head> <noscript> => self.unexpected(&token),
+ tag @ </_> => self.unexpected(&tag),
+
+ token => {
+ self.unexpected(&token);
+ self.pop();
+ Reprocess(InHead, token)
+ },
+ }),
+
+ //§ the-after-head-insertion-mode
+ AfterHead => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, text) => self.append_text(text),
+ CommentToken(text) => self.append_comment(text),
+
+ <html> => self.step(InBody, token),
+
+ tag @ <body> => {
+ self.insert_element_for(tag);
+ self.frameset_ok = false;
+ self.mode = InBody;
+ Done
+ }
+
+ tag @ <frameset> => {
+ self.insert_element_for(tag);
+ self.mode = InFrameset;
+ Done
+ }
+
+ <base> <basefont> <bgsound> <link> <meta>
+ <noframes> <script> <style> <template> <title> => {
+ self.unexpected(&token);
+ let head = self.head_elem.as_ref().expect("no head element").clone();
+ self.push(&head);
+ let result = self.step(InHead, token);
+ self.remove_from_stack(&head);
+ result
+ }
+
+ </template> => self.step(InHead, token),
+
+ </body> </html> </br> => else,
+
+ <head> => self.unexpected(&token),
+ tag @ </_> => self.unexpected(&tag),
+
+ token => {
+ self.insert_phantom(local_name!("body"));
+ Reprocess(InBody, token)
+ }
+ }),
+
+ //§ parsing-main-inbody
+ InBody => match_token!(token {
+ NullCharacterToken => self.unexpected(&token),
+
+ CharacterTokens(_, text) => {
+ self.reconstruct_formatting();
+ if any_not_whitespace(&text) {
+ self.frameset_ok = false;
+ }
+ self.append_text(text)
+ }
+
+ CommentToken(text) => self.append_comment(text),
+
+ tag @ <html> => {
+ self.unexpected(&tag);
+ if !self.in_html_elem_named(local_name!("template")) {
+ let top = html_elem(&self.open_elems);
+ self.sink.add_attrs_if_missing(top, tag.attrs);
+ }
+ Done
+ }
+
+ <base> <basefont> <bgsound> <link> <meta> <noframes>
+ <script> <style> <template> <title> </template> => {
+ self.step(InHead, token)
+ }
+
+ tag @ <body> => {
+ self.unexpected(&tag);
+ match self.body_elem().cloned() {
+ Some(ref node) if self.open_elems.len() != 1 &&
+ !self.in_html_elem_named(local_name!("template")) => {
+ self.frameset_ok = false;
+ self.sink.add_attrs_if_missing(node, tag.attrs)
+ },
+ _ => {}
+ }
+ Done
+ }
+
+ tag @ <frameset> => {
+ self.unexpected(&tag);
+ if !self.frameset_ok { return Done; }
+
+ let body = unwrap_or_return!(self.body_elem(), Done).clone();
+ self.sink.remove_from_parent(&body);
+
+ // FIXME: can we get here in the fragment case?
+ // What to do with the first element then?
+ self.open_elems.truncate(1);
+ self.insert_element_for(tag);
+ self.mode = InFrameset;
+ Done
+ }
+
+ EOFToken => {
+ if !self.template_modes.is_empty() {
+ self.step(InTemplate, token)
+ } else {
+ self.check_body_end();
+ self.stop_parsing()
+ }
+ }
+
+ </body> => {
+ if self.in_scope_named(default_scope, local_name!("body")) {
+ self.check_body_end();
+ self.mode = AfterBody;
+ } else {
+ self.sink.parse_error(Borrowed("</body> with no <body> in scope"));
+ }
+ Done
+ }
+
+ </html> => {
+ if self.in_scope_named(default_scope, local_name!("body")) {
+ self.check_body_end();
+ Reprocess(AfterBody, token)
+ } else {
+ self.sink.parse_error(Borrowed("</html> with no <body> in scope"));
+ Done
+ }
+ }
+
+ tag @ <address> <article> <aside> <blockquote> <center> <details> <dialog>
+ <dir> <div> <dl> <fieldset> <figcaption> <figure> <footer> <header>
+ <hgroup> <main> <nav> <ol> <p> <section> <summary> <ul> => {
+ self.close_p_element_in_button_scope();
+ self.insert_element_for(tag);
+ Done
+ }
+
+ tag @ <menu> => {
+ self.close_p_element_in_button_scope();
+ self.insert_element_for(tag);
+ Done
+ }
+
+ tag @ <h1> <h2> <h3> <h4> <h5> <h6> => {
+ self.close_p_element_in_button_scope();
+ if self.current_node_in(heading_tag) {
+ self.sink.parse_error(Borrowed("nested heading tags"));
+ self.pop();
+ }
+ self.insert_element_for(tag);
+ Done
+ }
+
+ tag @ <pre> <listing> => {
+ self.close_p_element_in_button_scope();
+ self.insert_element_for(tag);
+ self.ignore_lf = true;
+ self.frameset_ok = false;
+ Done
+ }
+
+ tag @ <form> => {
+ if self.form_elem.is_some() &&
+ !self.in_html_elem_named(local_name!("template")) {
+ self.sink.parse_error(Borrowed("nested forms"));
+ } else {
+ self.close_p_element_in_button_scope();
+ let elem = self.insert_element_for(tag);
+ if !self.in_html_elem_named(local_name!("template")) {
+ self.form_elem = Some(elem);
+ }
+ }
+ Done
+ }
+
+ tag @ <li> <dd> <dt> => {
+ declare_tag_set!(close_list = "li");
+ declare_tag_set!(close_defn = "dd" "dt");
+ declare_tag_set!(extra_special = [special_tag] - "address" "div" "p");
+ let list = match tag.name {
+ local_name!("li") => true,
+ local_name!("dd") | local_name!("dt") => false,
+ _ => unreachable!(),
+ };
+
+ self.frameset_ok = false;
+
+ let mut to_close = None;
+ for node in self.open_elems.iter().rev() {
+ let name = self.sink.elem_name(node);
+ let can_close = if list {
+ close_list(name)
+ } else {
+ close_defn(name)
+ };
+ if can_close {
+ to_close = Some(name.local.clone());
+ break;
+ }
+ if extra_special(name) {
+ break;
+ }
+ }
+
+ match to_close {
+ Some(name) => {
+ self.generate_implied_end_except(name.clone());
+ self.expect_to_close(name);
+ }
+ None => (),
+ }
+
+ self.close_p_element_in_button_scope();
+ self.insert_element_for(tag);
+ Done
+ }
+
+ tag @ <plaintext> => {
+ self.close_p_element_in_button_scope();
+ self.insert_element_for(tag);
+ ToPlaintext
+ }
+
+ tag @ <button> => {
+ if self.in_scope_named(default_scope, local_name!("button")) {
+ self.sink.parse_error(Borrowed("nested buttons"));
+ self.generate_implied_end(cursory_implied_end);
+ self.pop_until_named(local_name!("button"));
+ }
+ self.reconstruct_formatting();
+ self.insert_element_for(tag);
+ self.frameset_ok = false;
+ Done
+ }
+
+ tag @ </address> </article> </aside> </blockquote> </button> </center>
+ </details> </dialog> </dir> </div> </dl> </fieldset> </figcaption>
+ </figure> </footer> </header> </hgroup> </listing> </main> </menu>
+ </nav> </ol> </pre> </section> </summary> </ul> => {
+ if !self.in_scope_named(default_scope, tag.name.clone()) {
+ self.unexpected(&tag);
+ } else {
+ self.generate_implied_end(cursory_implied_end);
+ self.expect_to_close(tag.name);
+ }
+ Done
+ }
+
+ </form> => {
+ if !self.in_html_elem_named(local_name!("template")) {
+ // Can't use unwrap_or_return!() due to rust-lang/rust#16617.
+ let node = match self.form_elem.take() {
+ None => {
+ self.sink.parse_error(Borrowed("Null form element pointer on </form>"));
+ return Done;
+ }
+ Some(x) => x,
+ };
+ if !self.in_scope(default_scope, |n| self.sink.same_node(&node, &n)) {
+ self.sink.parse_error(Borrowed("Form element not in scope on </form>"));
+ return Done;
+ }
+ self.generate_implied_end(cursory_implied_end);
+ let current = self.current_node().clone();
+ self.remove_from_stack(&node);
+ if !self.sink.same_node(&current, &node) {
+ self.sink.parse_error(Borrowed("Bad open element on </form>"));
+ }
+ } else {
+ if !self.in_scope_named(default_scope, local_name!("form")) {
+ self.sink.parse_error(Borrowed("Form element not in scope on </form>"));
+ return Done;
+ }
+ self.generate_implied_end(cursory_implied_end);
+ if !self.current_node_named(local_name!("form")) {
+ self.sink.parse_error(Borrowed("Bad open element on </form>"));
+ }
+ self.pop_until_named(local_name!("form"));
+ }
+ Done
+ }
+
+ </p> => {
+ if !self.in_scope_named(button_scope, local_name!("p")) {
+ self.sink.parse_error(Borrowed("No <p> tag to close"));
+ self.insert_phantom(local_name!("p"));
+ }
+ self.close_p_element();
+ Done
+ }
+
+ tag @ </li> </dd> </dt> => {
+ let in_scope = if tag.name == local_name!("li") {
+ self.in_scope_named(list_item_scope, tag.name.clone())
+ } else {
+ self.in_scope_named(default_scope, tag.name.clone())
+ };
+ if in_scope {
+ self.generate_implied_end_except(tag.name.clone());
+ self.expect_to_close(tag.name);
+ } else {
+ self.sink.parse_error(Borrowed("No matching tag to close"));
+ }
+ Done
+ }
+
+ tag @ </h1> </h2> </h3> </h4> </h5> </h6> => {
+ if self.in_scope(default_scope, |n| self.elem_in(&n, heading_tag)) {
+ self.generate_implied_end(cursory_implied_end);
+ if !self.current_node_named(tag.name) {
+ self.sink.parse_error(Borrowed("Closing wrong heading tag"));
+ }
+ self.pop_until(heading_tag);
+ } else {
+ self.sink.parse_error(Borrowed("No heading tag to close"));
+ }
+ Done
+ }
+
+ tag @ <a> => {
+ self.handle_misnested_a_tags(&tag);
+ self.reconstruct_formatting();
+ self.create_formatting_element_for(tag);
+ Done
+ }
+
+ tag @ <b> <big> <code> <em> <font> <i> <s> <small> <strike> <strong> <tt> <u> => {
+ self.reconstruct_formatting();
+ self.create_formatting_element_for(tag);
+ Done
+ }
+
+ tag @ <nobr> => {
+ self.reconstruct_formatting();
+ if self.in_scope_named(default_scope, local_name!("nobr")) {
+ self.sink.parse_error(Borrowed("Nested <nobr>"));
+ self.adoption_agency(local_name!("nobr"));
+ self.reconstruct_formatting();
+ }
+ self.create_formatting_element_for(tag);
+ Done
+ }
+
+ tag @ </a> </b> </big> </code> </em> </font> </i> </nobr>
+ </s> </small> </strike> </strong> </tt> </u> => {
+ self.adoption_agency(tag.name);
+ Done
+ }
+
+ tag @ <applet> <marquee> <object> => {
+ self.reconstruct_formatting();
+ self.insert_element_for(tag);
+ self.active_formatting.push(Marker);
+ self.frameset_ok = false;
+ Done
+ }
+
+ tag @ </applet> </marquee> </object> => {
+ if !self.in_scope_named(default_scope, tag.name.clone()) {
+ self.unexpected(&tag);
+ } else {
+ self.generate_implied_end(cursory_implied_end);
+ self.expect_to_close(tag.name);
+ self.clear_active_formatting_to_marker();
+ }
+ Done
+ }
+
+ tag @ <table> => {
+ if self.quirks_mode != Quirks {
+ self.close_p_element_in_button_scope();
+ }
+ self.insert_element_for(tag);
+ self.frameset_ok = false;
+ self.mode = InTable;
+ Done
+ }
+
+ tag @ </br> => {
+ self.unexpected(&tag);
+ self.step(InBody, TagToken(Tag {
+ kind: StartTag,
+ attrs: vec!(),
+ ..tag
+ }))
+ }
+
+ tag @ <area> <br> <embed> <img> <keygen> <wbr> <input> => {
+ let keep_frameset_ok = match tag.name {
+ local_name!("input") => self.is_type_hidden(&tag),
+ _ => false,
+ };
+ self.reconstruct_formatting();
+ self.insert_and_pop_element_for(tag);
+ if !keep_frameset_ok {
+ self.frameset_ok = false;
+ }
+ DoneAckSelfClosing
+ }
+
+ tag @ <param> <source> <track> => {
+ self.insert_and_pop_element_for(tag);
+ DoneAckSelfClosing
+ }
+
+ tag @ <hr> => {
+ self.close_p_element_in_button_scope();
+ self.insert_and_pop_element_for(tag);
+ self.frameset_ok = false;
+ DoneAckSelfClosing
+ }
+
+ tag @ <image> => {
+ self.unexpected(&tag);
+ self.step(InBody, TagToken(Tag {
+ name: local_name!("img"),
+ ..tag
+ }))
+ }
+
+ tag @ <textarea> => {
+ self.ignore_lf = true;
+ self.frameset_ok = false;
+ self.parse_raw_data(tag, Rcdata)
+ }
+
+ tag @ <xmp> => {
+ self.close_p_element_in_button_scope();
+ self.reconstruct_formatting();
+ self.frameset_ok = false;
+ self.parse_raw_data(tag, Rawtext)
+ }
+
+ tag @ <iframe> => {
+ self.frameset_ok = false;
+ self.parse_raw_data(tag, Rawtext)
+ }
+
+ tag @ <noembed> => {
+ self.parse_raw_data(tag, Rawtext)
+ }
+
+ // <noscript> handled in wildcard case below
+
+ tag @ <select> => {
+ self.reconstruct_formatting();
+ self.insert_element_for(tag);
+ self.frameset_ok = false;
+ // NB: mode == InBody but possibly self.mode != mode, if
+ // we're processing "as in the rules for InBody".
+ self.mode = match self.mode {
+ InTable | InCaption | InTableBody
+ | InRow | InCell => InSelectInTable,
+ _ => InSelect,
+ };
+ Done
+ }
+
+ tag @ <optgroup> <option> => {
+ if self.current_node_named(local_name!("option")) {
+ self.pop();
+ }
+ self.reconstruct_formatting();
+ self.insert_element_for(tag);
+ Done
+ }
+
+ tag @ <rb> <rtc> => {
+ if self.in_scope_named(default_scope, local_name!("ruby")) {
+ self.generate_implied_end(cursory_implied_end);
+ }
+ if !self.current_node_named(local_name!("ruby")) {
+ self.unexpected(&tag);
+ }
+ self.insert_element_for(tag);
+ Done
+ }
+
+ tag @ <rp> <rt> => {
+ if self.in_scope_named(default_scope, local_name!("ruby")) {
+ self.generate_implied_end_except(local_name!("rtc"));
+ }
+ if !self.current_node_named(local_name!("rtc")) && !self.current_node_named(local_name!("ruby")) {
+ self.unexpected(&tag);
+ }
+ self.insert_element_for(tag);
+ Done
+ }
+
+ tag @ <math> => self.enter_foreign(tag, ns!(mathml)),
+
+ tag @ <svg> => self.enter_foreign(tag, ns!(svg)),
+
+ <caption> <col> <colgroup> <frame> <head>
+ <tbody> <td> <tfoot> <th> <thead> <tr> => {
+ self.unexpected(&token);
+ Done
+ }
+
+ tag @ <_> => {
+ if self.opts.scripting_enabled && tag.name == local_name!("noscript") {
+ self.parse_raw_data(tag, Rawtext)
+ } else {
+ self.reconstruct_formatting();
+ self.insert_element_for(tag);
+ Done
+ }
+ }
+
+ tag @ </_> => {
+ self.process_end_tag_in_body(tag);
+ Done
+ }
+
+ // FIXME: This should be unreachable, but match_token requires a
+ // catch-all case.
+ _ => panic!("impossible case in InBody mode"),
+ }),
+
+ //§ parsing-main-incdata
+ Text => match_token!(token {
+ CharacterTokens(_, text) => self.append_text(text),
+
+ EOFToken => {
+ self.unexpected(&token);
+ if self.current_node_named(local_name!("script")) {
+ let current = current_node(&self.open_elems);
+ self.sink.mark_script_already_started(current);
+ }
+ self.pop();
+ Reprocess(self.orig_mode.take().unwrap(), token)
+ }
+
+ tag @ </_> => {
+ let node = self.pop();
+ self.mode = self.orig_mode.take().unwrap();
+ if tag.name == local_name!("script") {
+ return Script(node);
+ }
+ Done
+ }
+
+ // The spec doesn't say what to do here.
+ // Other tokens are impossible?
+ _ => panic!("impossible case in Text mode"),
+ }),
+
+ //§ parsing-main-intable
+ InTable => match_token!(token {
+ // FIXME: hack, should implement pat | pat for match_token instead
+ NullCharacterToken => self.process_chars_in_table(token),
+
+ CharacterTokens(..) => self.process_chars_in_table(token),
+
+ CommentToken(text) => self.append_comment(text),
+
+ tag @ <caption> => {
+ self.pop_until_current(table_scope);
+ self.active_formatting.push(Marker);
+ self.insert_element_for(tag);
+ self.mode = InCaption;
+ Done
+ }
+
+ tag @ <colgroup> => {
+ self.pop_until_current(table_scope);
+ self.insert_element_for(tag);
+ self.mode = InColumnGroup;
+ Done
+ }
+
+ <col> => {
+ self.pop_until_current(table_scope);
+ self.insert_phantom(local_name!("colgroup"));
+ Reprocess(InColumnGroup, token)
+ }
+
+ tag @ <tbody> <tfoot> <thead> => {
+ self.pop_until_current(table_scope);
+ self.insert_element_for(tag);
+ self.mode = InTableBody;
+ Done
+ }
+
+ <td> <th> <tr> => {
+ self.pop_until_current(table_scope);
+ self.insert_phantom(local_name!("tbody"));
+ Reprocess(InTableBody, token)
+ }
+
+ <table> => {
+ self.unexpected(&token);
+ if self.in_scope_named(table_scope, local_name!("table")) {
+ self.pop_until_named(local_name!("table"));
+ Reprocess(self.reset_insertion_mode(), token)
+ } else {
+ Done
+ }
+ }
+
+ </table> => {
+ if self.in_scope_named(table_scope, local_name!("table")) {
+ self.pop_until_named(local_name!("table"));
+ self.mode = self.reset_insertion_mode();
+ } else {
+ self.unexpected(&token);
+ }
+ Done
+ }
+
+ </body> </caption> </col> </colgroup> </html>
+ </tbody> </td> </tfoot> </th> </thead> </tr> =>
+ self.unexpected(&token),
+
+ <style> <script> <template> </template>
+ => self.step(InHead, token),
+
+ tag @ <input> => {
+ self.unexpected(&tag);
+ if self.is_type_hidden(&tag) {
+ self.insert_and_pop_element_for(tag);
+ DoneAckSelfClosing
+ } else {
+ self.foster_parent_in_body(TagToken(tag))
+ }
+ }
+
+ tag @ <form> => {
+ self.unexpected(&tag);
+ if !self.in_html_elem_named(local_name!("template")) && self.form_elem.is_none() {
+ self.form_elem = Some(self.insert_and_pop_element_for(tag));
+ }
+ Done
+ }
+
+ EOFToken => self.step(InBody, token),
+
+ token => {
+ self.unexpected(&token);
+ self.foster_parent_in_body(token)
+ }
+ }),
+
+ //§ parsing-main-intabletext
+ InTableText => match_token!(token {
+ NullCharacterToken => self.unexpected(&token),
+
+ CharacterTokens(split, text) => {
+ self.pending_table_text.push((split, text));
+ Done
+ }
+
+ token => {
+ let pending = replace(&mut self.pending_table_text, vec!());
+ let contains_nonspace = pending.iter().any(|&(split, ref text)| {
+ match split {
+ Whitespace => false,
+ NotWhitespace => true,
+ NotSplit => any_not_whitespace(text),
+ }
+ });
+
+ if contains_nonspace {
+ self.sink.parse_error(Borrowed("Non-space table text"));
+ for (split, text) in pending.into_iter() {
+ match self.foster_parent_in_body(CharacterTokens(split, text)) {
+ Done => (),
+ _ => panic!("not prepared to handle this!"),
+ }
+ }
+ } else {
+ for (_, text) in pending.into_iter() {
+ self.append_text(text);
+ }
+ }
+
+ Reprocess(self.orig_mode.take().unwrap(), token)
+ }
+ }),
+
+ //§ parsing-main-incaption
+ InCaption => match_token!(token {
+ tag @ <caption> <col> <colgroup> <tbody> <td> <tfoot>
+ <th> <thead> <tr> </table> </caption> => {
+ if self.in_scope_named(table_scope, local_name!("caption")) {
+ self.generate_implied_end(cursory_implied_end);
+ self.expect_to_close(local_name!("caption"));
+ self.clear_active_formatting_to_marker();
+ match tag {
+ Tag { kind: EndTag, name: local_name!("caption"), .. } => {
+ self.mode = InTable;
+ Done
+ }
+ _ => Reprocess(InTable, TagToken(tag))
+ }
+ } else {
+ self.unexpected(&tag);
+ Done
+ }
+ }
+
+ </body> </col> </colgroup> </html> </tbody>
+ </td> </tfoot> </th> </thead> </tr> => self.unexpected(&token),
+
+ token => self.step(InBody, token),
+ }),
+
+ //§ parsing-main-incolgroup
+ InColumnGroup => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, text) => self.append_text(text),
+ CommentToken(text) => self.append_comment(text),
+
+ <html> => self.step(InBody, token),
+
+ tag @ <col> => {
+ self.insert_and_pop_element_for(tag);
+ DoneAckSelfClosing
+ }
+
+ </colgroup> => {
+ if self.current_node_named(local_name!("colgroup")) {
+ self.pop();
+ self.mode = InTable;
+ } else {
+ self.unexpected(&token);
+ }
+ Done
+ }
+
+ </col> => self.unexpected(&token),
+
+ <template> </template> => self.step(InHead, token),
+
+ EOFToken => self.step(InBody, token),
+
+ token => {
+ if self.current_node_named(local_name!("colgroup")) {
+ self.pop();
+ Reprocess(InTable, token)
+ } else {
+ self.unexpected(&token)
+ }
+ }
+ }),
+
+ //§ parsing-main-intbody
+ InTableBody => match_token!(token {
+ tag @ <tr> => {
+ self.pop_until_current(table_body_context);
+ self.insert_element_for(tag);
+ self.mode = InRow;
+ Done
+ }
+
+ <th> <td> => {
+ self.unexpected(&token);
+ self.pop_until_current(table_body_context);
+ self.insert_phantom(local_name!("tr"));
+ Reprocess(InRow, token)
+ }
+
+ tag @ </tbody> </tfoot> </thead> => {
+ if self.in_scope_named(table_scope, tag.name.clone()) {
+ self.pop_until_current(table_body_context);
+ self.pop();
+ self.mode = InTable;
+ } else {
+ self.unexpected(&tag);
+ }
+ Done
+ }
+
+ <caption> <col> <colgroup> <tbody> <tfoot> <thead> </table> => {
+ declare_tag_set!(table_outer = "table" "tbody" "tfoot");
+ if self.in_scope(table_scope, |e| self.elem_in(&e, table_outer)) {
+ self.pop_until_current(table_body_context);
+ self.pop();
+ Reprocess(InTable, token)
+ } else {
+ self.unexpected(&token)
+ }
+ }
+
+ </body> </caption> </col> </colgroup> </html> </td> </th> </tr>
+ => self.unexpected(&token),
+
+ token => self.step(InTable, token),
+ }),
+
+ //§ parsing-main-intr
+ InRow => match_token!(token {
+ tag @ <th> <td> => {
+ self.pop_until_current(table_row_context);
+ self.insert_element_for(tag);
+ self.mode = InCell;
+ self.active_formatting.push(Marker);
+ Done
+ }
+
+ </tr> => {
+ if self.in_scope_named(table_scope, local_name!("tr")) {
+ self.pop_until_current(table_row_context);
+ let node = self.pop();
+ self.assert_named(&node, local_name!("tr"));
+ self.mode = InTableBody;
+ } else {
+ self.unexpected(&token);
+ }
+ Done
+ }
+
+ <caption> <col> <colgroup> <tbody> <tfoot> <thead> <tr> </table> => {
+ if self.in_scope_named(table_scope, local_name!("tr")) {
+ self.pop_until_current(table_row_context);
+ let node = self.pop();
+ self.assert_named(&node, local_name!("tr"));
+ Reprocess(InTableBody, token)
+ } else {
+ self.unexpected(&token)
+ }
+ }
+
+ tag @ </tbody> </tfoot> </thead> => {
+ if self.in_scope_named(table_scope, tag.name.clone()) {
+ if self.in_scope_named(table_scope, local_name!("tr")) {
+ self.pop_until_current(table_row_context);
+ let node = self.pop();
+ self.assert_named(&node, local_name!("tr"));
+ Reprocess(InTableBody, TagToken(tag))
+ } else {
+ Done
+ }
+ } else {
+ self.unexpected(&tag)
+ }
+ }
+
+ </body> </caption> </col> </colgroup> </html> </td> </th>
+ => self.unexpected(&token),
+
+ token => self.step(InTable, token),
+ }),
+
+ //§ parsing-main-intd
+ InCell => match_token!(token {
+ tag @ </td> </th> => {
+ if self.in_scope_named(table_scope, tag.name.clone()) {
+ self.generate_implied_end(cursory_implied_end);
+ self.expect_to_close(tag.name);
+ self.clear_active_formatting_to_marker();
+ self.mode = InRow;
+ } else {
+ self.unexpected(&tag);
+ }
+ Done
+ }
+
+ <caption> <col> <colgroup> <tbody> <td> <tfoot> <th> <thead> <tr> => {
+ if self.in_scope(table_scope, |n| self.elem_in(&n, td_th)) {
+ self.close_the_cell();
+ Reprocess(InRow, token)
+ } else {
+ self.unexpected(&token)
+ }
+ }
+
+ </body> </caption> </col> </colgroup> </html>
+ => self.unexpected(&token),
+
+ tag @ </table> </tbody> </tfoot> </thead> </tr> => {
+ if self.in_scope_named(table_scope, tag.name.clone()) {
+ self.close_the_cell();
+ Reprocess(InRow, TagToken(tag))
+ } else {
+ self.unexpected(&tag)
+ }
+ }
+
+ token => self.step(InBody, token),
+ }),
+
+ //§ parsing-main-inselect
+ InSelect => match_token!(token {
+ NullCharacterToken => self.unexpected(&token),
+ CharacterTokens(_, text) => self.append_text(text),
+ CommentToken(text) => self.append_comment(text),
+
+ <html> => self.step(InBody, token),
+
+ tag @ <option> => {
+ if self.current_node_named(local_name!("option")) {
+ self.pop();
+ }
+ self.insert_element_for(tag);
+ Done
+ }
+
+ tag @ <optgroup> => {
+ if self.current_node_named(local_name!("option")) {
+ self.pop();
+ }
+ if self.current_node_named(local_name!("optgroup")) {
+ self.pop();
+ }
+ self.insert_element_for(tag);
+ Done
+ }
+
+ </optgroup> => {
+ if self.open_elems.len() >= 2
+ && self.current_node_named(local_name!("option"))
+ && self.html_elem_named(&self.open_elems[self.open_elems.len() - 2],
+ local_name!("optgroup")) {
+ self.pop();
+ }
+ if self.current_node_named(local_name!("optgroup")) {
+ self.pop();
+ } else {
+ self.unexpected(&token);
+ }
+ Done
+ }
+
+ </option> => {
+ if self.current_node_named(local_name!("option")) {
+ self.pop();
+ } else {
+ self.unexpected(&token);
+ }
+ Done
+ }
+
+ tag @ <select> </select> => {
+ let in_scope = self.in_scope_named(select_scope, local_name!("select"));
+
+ if !in_scope || tag.kind == StartTag {
+ self.unexpected(&tag);
+ }
+
+ if in_scope {
+ self.pop_until_named(local_name!("select"));
+ self.mode = self.reset_insertion_mode();
+ }
+ Done
+ }
+
+ <input> <keygen> <textarea> => {
+ self.unexpected(&token);
+ if self.in_scope_named(select_scope, local_name!("select")) {
+ self.pop_until_named(local_name!("select"));
+ Reprocess(self.reset_insertion_mode(), token)
+ } else {
+ Done
+ }
+ }
+
+ <script> <template> </template> => self.step(InHead, token),
+
+ EOFToken => self.step(InBody, token),
+
+ token => self.unexpected(&token),
+ }),
+
+ //§ parsing-main-inselectintable
+ InSelectInTable => match_token!(token {
+ <caption> <table> <tbody> <tfoot> <thead> <tr> <td> <th> => {
+ self.unexpected(&token);
+ self.pop_until_named(local_name!("select"));
+ Reprocess(self.reset_insertion_mode(), token)
+ }
+
+ tag @ </caption> </table> </tbody> </tfoot> </thead> </tr> </td> </th> => {
+ self.unexpected(&tag);
+ if self.in_scope_named(table_scope, tag.name.clone()) {
+ self.pop_until_named(local_name!("select"));
+ Reprocess(self.reset_insertion_mode(), TagToken(tag))
+ } else {
+ Done
+ }
+ }
+
+ token => self.step(InSelect, token),
+ }),
+
+ //§ parsing-main-intemplate
+ InTemplate => match_token!(token {
+ CharacterTokens(_, _) => self.step(InBody, token),
+ CommentToken(_) => self.step(InBody, token),
+
+ <base> <basefont> <bgsound> <link> <meta> <noframes> <script>
+ <style> <template> <title> </template> => {
+ self.step(InHead, token)
+ }
+
+ <caption> <colgroup> <tbody> <tfoot> <thead> => {
+ self.template_modes.pop();
+ self.template_modes.push(InTable);
+ Reprocess(InTable, token)
+ }
+
+ <col> => {
+ self.template_modes.pop();
+ self.template_modes.push(InColumnGroup);
+ Reprocess(InColumnGroup, token)
+ }
+
+ <tr> => {
+ self.template_modes.pop();
+ self.template_modes.push(InTableBody);
+ Reprocess(InTableBody, token)
+ }
+
+ <td> <th> => {
+ self.template_modes.pop();
+ self.template_modes.push(InRow);
+ Reprocess(InRow, token)
+ }
+
+ EOFToken => {
+ if !self.in_html_elem_named(local_name!("template")) {
+ self.stop_parsing()
+ } else {
+ self.unexpected(&token);
+ self.pop_until_named(local_name!("template"));
+ self.clear_active_formatting_to_marker();
+ self.template_modes.pop();
+ self.mode = self.reset_insertion_mode();
+ Reprocess(self.reset_insertion_mode(), token)
+ }
+ }
+
+ tag @ <_> => {
+ self.template_modes.pop();
+ self.template_modes.push(InBody);
+ Reprocess(InBody, TagToken(tag))
+ }
+
+ token => self.unexpected(&token),
+ }),
+
+ //§ parsing-main-afterbody
+ AfterBody => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, _) => self.step(InBody, token),
+ CommentToken(text) => self.append_comment_to_html(text),
+
+ <html> => self.step(InBody, token),
+
+ </html> => {
+ if self.is_fragment() {
+ self.unexpected(&token);
+ } else {
+ self.mode = AfterAfterBody;
+ }
+ Done
+ }
+
+ EOFToken => self.stop_parsing(),
+
+ token => {
+ self.unexpected(&token);
+ Reprocess(InBody, token)
+ }
+ }),
+
+ //§ parsing-main-inframeset
+ InFrameset => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, text) => self.append_text(text),
+ CommentToken(text) => self.append_comment(text),
+
+ <html> => self.step(InBody, token),
+
+ tag @ <frameset> => {
+ self.insert_element_for(tag);
+ Done
+ }
+
+ </frameset> => {
+ if self.open_elems.len() == 1 {
+ self.unexpected(&token);
+ } else {
+ self.pop();
+ if !self.is_fragment() && !self.current_node_named(local_name!("frameset")) {
+ self.mode = AfterFrameset;
+ }
+ }
+ Done
+ }
+
+ tag @ <frame> => {
+ self.insert_and_pop_element_for(tag);
+ DoneAckSelfClosing
+ }
+
+ <noframes> => self.step(InHead, token),
+
+ EOFToken => {
+ if self.open_elems.len() != 1 {
+ self.unexpected(&token);
+ }
+ self.stop_parsing()
+ }
+
+ token => self.unexpected(&token),
+ }),
+
+ //§ parsing-main-afterframeset
+ AfterFrameset => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, text) => self.append_text(text),
+ CommentToken(text) => self.append_comment(text),
+
+ <html> => self.step(InBody, token),
+
+ </html> => {
+ self.mode = AfterAfterFrameset;
+ Done
+ }
+
+ <noframes> => self.step(InHead, token),
+
+ EOFToken => self.stop_parsing(),
+
+ token => self.unexpected(&token),
+ }),
+
+ //§ the-after-after-body-insertion-mode
+ AfterAfterBody => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, _) => self.step(InBody, token),
+ CommentToken(text) => self.append_comment_to_doc(text),
+
+ <html> => self.step(InBody, token),
+
+ EOFToken => self.stop_parsing(),
+
+ token => {
+ self.unexpected(&token);
+ Reprocess(InBody, token)
+ }
+ }),
+
+ //§ the-after-after-frameset-insertion-mode
+ AfterAfterFrameset => match_token!(token {
+ CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+ CharacterTokens(Whitespace, _) => self.step(InBody, token),
+ CommentToken(text) => self.append_comment_to_doc(text),
+
+ <html> => self.step(InBody, token),
+
+ EOFToken => self.stop_parsing(),
+
+ <noframes> => self.step(InHead, token),
+
+ token => self.unexpected(&token),
+ }),
+ //§ END
+ }
+ }
+
+ fn step_foreign(&mut self, token: Token) -> ProcessResult<Handle> {
+ match_token!(token {
+ NullCharacterToken => {
+ self.unexpected(&token);
+ self.append_text("\u{fffd}".to_tendril())
+ }
+
+ CharacterTokens(_, text) => {
+ if any_not_whitespace(&text) {
+ self.frameset_ok = false;
+ }
+ self.append_text(text)
+ }
+
+ CommentToken(text) => self.append_comment(text),
+
+ tag @ <b> <big> <blockquote> <body> <br> <center> <code> <dd> <div> <dl>
+ <dt> <em> <embed> <h1> <h2> <h3> <h4> <h5> <h6> <head> <hr> <i>
+ <img> <li> <listing> <menu> <meta> <nobr> <ol> <p> <pre> <ruby>
+ <s> <small> <span> <strong> <strike> <sub> <sup> <table> <tt>
+ <u> <ul> <var> => self.unexpected_start_tag_in_foreign_content(tag),
+
+ tag @ <font> => {
+ let unexpected = tag.attrs.iter().any(|attr| {
+ matches!(attr.name.expanded(),
+ expanded_name!("", "color") |
+ expanded_name!("", "face") |
+ expanded_name!("", "size"))
+ });
+ if unexpected {
+ self.unexpected_start_tag_in_foreign_content(tag)
+ } else {
+ self.foreign_start_tag(tag)
+ }
+ }
+
+ tag @ <_> => self.foreign_start_tag(tag),
+
+ // FIXME(#118): </script> in SVG
+
+ tag @ </_> => {
+ let mut first = true;
+ let mut stack_idx = self.open_elems.len() - 1;
+ loop {
+ if stack_idx == 0 {
+ return Done;
+ }
+
+ let html;
+ let eq;
+ {
+ let node_name = self.sink.elem_name(&self.open_elems[stack_idx]);
+ html = *node_name.ns == ns!(html);
+ eq = node_name.local.eq_ignore_ascii_case(&tag.name);
+ }
+ if !first && html {
+ let mode = self.mode;
+ return self.step(mode, TagToken(tag));
+ }
+
+ if eq {
+ self.open_elems.truncate(stack_idx);
+ return Done;
+ }
+
+ if first {
+ self.unexpected(&tag);
+ first = false;
+ }
+ stack_idx -= 1;
+ }
+ }
+
+ // FIXME: This should be unreachable, but match_token requires a
+ // catch-all case.
+ _ => panic!("impossible case in foreign content"),
+ })
+ }
+}
diff --git a/src/tree_builder/tag_sets.rs b/src/tree_builder/tag_sets.rs
new file mode 100644
index 0000000..377b34c
--- /dev/null
+++ b/src/tree_builder/tag_sets.rs
@@ -0,0 +1,115 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Various sets of HTML tag names, and macros for declaring them.
+
+use crate::ExpandedName;
+use mac::{_tt_as_expr_hack, matches};
+use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns};
+
+macro_rules! declare_tag_set_impl ( ($param:ident, $b:ident, $supr:ident, $($tag:tt)+) => (
+ match $param {
+ $( expanded_name!(html $tag) => $b, )+
+ p => $supr(p),
+ }
+));
+
+macro_rules! declare_tag_set_body (
+ ($param:ident = [$supr:ident] - $($tag:tt)+)
+ => ( declare_tag_set_impl!($param, false, $supr, $($tag)+) );
+
+ ($param:ident = [$supr:ident] + $($tag:tt)+)
+ => ( declare_tag_set_impl!($param, true, $supr, $($tag)+) );
+
+ ($param:ident = $($tag:tt)+)
+ => ( declare_tag_set_impl!($param, true, empty_set, $($tag)+) );
+);
+
+macro_rules! declare_tag_set (
+ (pub $name:ident = $($toks:tt)+) => (
+ pub fn $name(p: crate::ExpandedName) -> bool {
+ declare_tag_set_body!(p = $($toks)+)
+ }
+ );
+
+ ($name:ident = $($toks:tt)+) => (
+ fn $name(p: crate::ExpandedName) -> bool {
+ declare_tag_set_body!(p = $($toks)+)
+ }
+ );
+);
+
+#[inline(always)]
+pub fn empty_set(_: ExpandedName) -> bool {
+ false
+}
+#[inline(always)]
+pub fn full_set(_: ExpandedName) -> bool {
+ true
+}
+
+declare_tag_set!(pub html_default_scope =
+ "applet" "caption" "html" "table" "td" "th" "marquee" "object" "template");
+
+#[inline(always)]
+pub fn default_scope(name: ExpandedName) -> bool {
+ html_default_scope(name) ||
+ mathml_text_integration_point(name) ||
+ svg_html_integration_point(name)
+}
+
+declare_tag_set!(pub list_item_scope = [default_scope] + "ol" "ul");
+declare_tag_set!(pub button_scope = [default_scope] + "button");
+declare_tag_set!(pub table_scope = "html" "table" "template");
+declare_tag_set!(pub select_scope = [full_set] - "optgroup" "option");
+
+declare_tag_set!(pub table_body_context = "tbody" "tfoot" "thead" "template" "html");
+declare_tag_set!(pub table_row_context = "tr" "template" "html");
+declare_tag_set!(pub td_th = "td" "th");
+
+declare_tag_set!(pub cursory_implied_end =
+ "dd" "dt" "li" "option" "optgroup" "p" "rb" "rp" "rt" "rtc");
+
+declare_tag_set!(pub thorough_implied_end = [cursory_implied_end]
+ + "caption" "colgroup" "tbody" "td" "tfoot" "th" "thead" "tr");
+
+declare_tag_set!(pub heading_tag = "h1" "h2" "h3" "h4" "h5" "h6");
+
+declare_tag_set!(pub special_tag =
+ "address" "applet" "area" "article" "aside" "base" "basefont" "bgsound" "blockquote" "body"
+ "br" "button" "caption" "center" "col" "colgroup" "dd" "details" "dir" "div" "dl" "dt" "embed"
+ "fieldset" "figcaption" "figure" "footer" "form" "frame" "frameset" "h1" "h2" "h3" "h4" "h5"
+ "h6" "head" "header" "hgroup" "hr" "html" "iframe" "img" "input" "isindex" "li" "link"
+ "listing" "main" "marquee" "menu" "meta" "nav" "noembed" "noframes" "noscript"
+ "object" "ol" "p" "param" "plaintext" "pre" "script" "section" "select" "source" "style"
+ "summary" "table" "tbody" "td" "template" "textarea" "tfoot" "th" "thead" "title" "tr" "track"
+ "ul" "wbr" "xmp");
+//§ END
+
+pub fn mathml_text_integration_point(p: ExpandedName) -> bool {
+ matches!(
+ p,
+ expanded_name!(mathml "mi") |
+ expanded_name!(mathml "mo") |
+ expanded_name!(mathml "mn") |
+ expanded_name!(mathml "ms") |
+ expanded_name!(mathml "mtext")
+ )
+}
+
+/// https://html.spec.whatwg.org/multipage/#html-integration-point
+pub fn svg_html_integration_point(p: ExpandedName) -> bool {
+ // annotation-xml are handle in another place
+ matches!(
+ p,
+ expanded_name!(svg "foreignObject") |
+ expanded_name!(svg "desc") |
+ expanded_name!(svg "title")
+ )
+}
diff --git a/src/tree_builder/types.rs b/src/tree_builder/types.rs
new file mode 100644
index 0000000..e47d69b
--- /dev/null
+++ b/src/tree_builder/types.rs
@@ -0,0 +1,95 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Types used within the tree builder code. Not exported to users.
+
+use crate::tokenizer::states::RawKind;
+use crate::tokenizer::Tag;
+
+use crate::tendril::StrTendril;
+
+pub use self::FormatEntry::*;
+pub use self::InsertionMode::*;
+pub use self::InsertionPoint::*;
+pub use self::ProcessResult::*;
+pub use self::SplitStatus::*;
+pub use self::Token::*;
+
+#[derive(PartialEq, Eq, Copy, Clone, Debug)]
+pub enum InsertionMode {
+ Initial,
+ BeforeHtml,
+ BeforeHead,
+ InHead,
+ InHeadNoscript,
+ AfterHead,
+ InBody,
+ Text,
+ InTable,
+ InTableText,
+ InCaption,
+ InColumnGroup,
+ InTableBody,
+ InRow,
+ InCell,
+ InSelect,
+ InSelectInTable,
+ InTemplate,
+ AfterBody,
+ InFrameset,
+ AfterFrameset,
+ AfterAfterBody,
+ AfterAfterFrameset,
+}
+
+#[derive(PartialEq, Eq, Copy, Clone, Debug)]
+pub enum SplitStatus {
+ NotSplit,
+ Whitespace,
+ NotWhitespace,
+}
+
+/// A subset/refinement of `tokenizer::Token`. Everything else is handled
+/// specially at the beginning of `process_token`.
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub enum Token {
+ TagToken(Tag),
+ CommentToken(StrTendril),
+ CharacterTokens(SplitStatus, StrTendril),
+ NullCharacterToken,
+ EOFToken,
+}
+
+pub enum ProcessResult<Handle> {
+ Done,
+ DoneAckSelfClosing,
+ SplitWhitespace(StrTendril),
+ Reprocess(InsertionMode, Token),
+ ReprocessForeign(Token),
+ Script(Handle),
+ ToPlaintext,
+ ToRawData(RawKind),
+}
+
+pub enum FormatEntry<Handle> {
+ Element(Handle, Tag),
+ Marker,
+}
+
+pub enum InsertionPoint<Handle> {
+ /// Insert as last child in this parent.
+ LastChild(Handle),
+ /// Insert before this following sibling.
+ BeforeSibling(Handle),
+ /// Insertion point is decided based on existence of element's parent node.
+ TableFosterParenting {
+ element: Handle,
+ prev_element: Handle,
+ },
+}