import https://github.com/servo/html5ever

commit d1206daa740305f55a5fa159e43eb33afc359cb4
author: Martin Fischer <martin@push-f.com> 2021-04-08 08:42:01 +0200
committer: Martin Fischer <martin@push-f.com> 2021-04-08 15:40:37 +0200
commit: 57e7eefcbe6fb8c3dc4b01c707be9de4c34963a7 (patch)
tree: 6a9d296389bf3023396592c8514ed6712e011c7f /src
14 files changed, 6392 insertions, 0 deletions
diff --git a/src/driver.rs b/src/driver.rs
new file mode 100644
index 0000000..26db9b8
--- /dev/null
+++ b/src/driver.rs
@@ -0,0 +1,137 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! High-level interface to the parser.
+
+use crate::buffer_queue::BufferQueue;
+use crate::tokenizer::{Tokenizer, TokenizerOpts, TokenizerResult};
+use crate::tree_builder::{create_element, TreeBuilder, TreeBuilderOpts, TreeSink};
+use crate::{Attribute, QualName};
+
+use std::borrow::Cow;
+
+use crate::tendril;
+use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder};
+use crate::tendril::StrTendril;
+
+/// All-encompassing options struct for the parser.
+#[derive(Clone, Default)]
+pub struct ParseOpts {
+    /// Tokenizer options.
+    pub tokenizer: TokenizerOpts,
+
+    /// Tree builder options.
+    pub tree_builder: TreeBuilderOpts,
+}
+
+/// Parse an HTML document
+///
+/// The returned value implements `tendril::TendrilSink`
+/// so that Unicode input may be provided incrementally,
+/// or all at once with the `one` method.
+///
+/// If your input is bytes, use `Parser::from_utf8`.
+pub fn parse_document<Sink>(sink: Sink, opts: ParseOpts) -> Parser<Sink>
+where
+    Sink: TreeSink,
+{
+    let tb = TreeBuilder::new(sink, opts.tree_builder);
+    let tok = Tokenizer::new(tb, opts.tokenizer);
+    Parser {
+        tokenizer: tok,
+        input_buffer: BufferQueue::new(),
+    }
+}
+
+/// Parse an HTML fragment
+///
+/// The returned value implements `tendril::TendrilSink`
+/// so that Unicode input may be provided incrementally,
+/// or all at once with the `one` method.
+///
+/// If your input is bytes, use `Parser::from_utf8`.
+pub fn parse_fragment<Sink>(
+    mut sink: Sink,
+    opts: ParseOpts,
+    context_name: QualName,
+    context_attrs: Vec<Attribute>,
+) -> Parser<Sink>
+where
+    Sink: TreeSink,
+{
+    let context_elem = create_element(&mut sink, context_name, context_attrs);
+    parse_fragment_for_element(sink, opts, context_elem, None)
+}
+
+/// Like `parse_fragment`, but with an existing context element
+/// and optionally a form element.
+pub fn parse_fragment_for_element<Sink>(
+    sink: Sink,
+    opts: ParseOpts,
+    context_element: Sink::Handle,
+    form_element: Option<Sink::Handle>,
+) -> Parser<Sink>
+where
+    Sink: TreeSink,
+{
+    let tb = TreeBuilder::new_for_fragment(sink, context_element, form_element, opts.tree_builder);
+    let tok_opts = TokenizerOpts {
+        initial_state: Some(tb.tokenizer_state_for_context_elem()),
+        ..opts.tokenizer
+    };
+    let tok = Tokenizer::new(tb, tok_opts);
+    Parser {
+        tokenizer: tok,
+        input_buffer: BufferQueue::new(),
+    }
+}
+
+/// An HTML parser,
+/// ready to receive Unicode input through the `tendril::TendrilSink` trait’s methods.
+pub struct Parser<Sink>
+where
+    Sink: TreeSink,
+{
+    pub tokenizer: Tokenizer<TreeBuilder<Sink::Handle, Sink>>,
+    pub input_buffer: BufferQueue,
+}
+
+impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for Parser<Sink> {
+    fn process(&mut self, t: StrTendril) {
+        self.input_buffer.push_back(t);
+        // FIXME: Properly support </script> somehow.
+        while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {}
+    }
+
+    // FIXME: Is it too noisy to report every character decoding error?
+    fn error(&mut self, desc: Cow<'static, str>) {
+        self.tokenizer.sink.sink.parse_error(desc)
+    }
+
+    type Output = Sink::Output;
+
+    fn finish(mut self) -> Self::Output {
+        // FIXME: Properly support </script> somehow.
+        while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {}
+        assert!(self.input_buffer.is_empty());
+        self.tokenizer.end();
+        self.tokenizer.sink.sink.finish()
+    }
+}
+
+impl<Sink: TreeSink> Parser<Sink> {
+    /// Wrap this parser into a `TendrilSink` that accepts UTF-8 bytes.
+    ///
+    /// Use this when your input is bytes that are known to be in the UTF-8 encoding.
+    /// Decoding is lossy, like `String::from_utf8_lossy`.
+    #[allow(clippy::wrong_self_convention)]
+    pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
+        Utf8LossyDecoder::new(self)
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..65fadaa
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,30 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![crate_name = "html5ever"]
+#![crate_type = "dylib"]
+#![cfg_attr(test, deny(warnings))]
+#![allow(unused_parens)]
+
+pub use driver::{parse_document, parse_fragment, ParseOpts, Parser};
+pub use markup5ever::*;
+
+pub use serialize::serialize;
+
+#[macro_use]
+mod macros;
+
+mod util {
+    pub mod str;
+}
+
+pub mod driver;
+pub mod serialize;
+pub mod tokenizer;
+pub mod tree_builder;
diff --git a/src/macros.rs b/src/macros.rs
new file mode 100644
index 0000000..558a4a9
--- /dev/null
+++ b/src/macros.rs
@@ -0,0 +1,33 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+macro_rules! unwrap_or_else {
+    ($opt:expr, $else_block:block) => {
+        match $opt {
+            None => $else_block,
+            Some(x) => x,
+        }
+    };
+}
+
+macro_rules! unwrap_or_return {
+    ($opt:expr, $retval:expr) => {
+        unwrap_or_else!($opt, { return $retval })
+    };
+}
+
+macro_rules! time {
+    ($e:expr) => {{
+        let now = ::std::time::Instant::now();
+        let result = $e;
+        let d = now.elapsed();
+        let dt = d.as_secs() * 1_000_000_000 + u64::from(d.subsec_nanos());
+        (result, dt)
+    }};
+}
diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs
new file mode 100644
index 0000000..3a57b47
--- /dev/null
+++ b/src/serialize/mod.rs
@@ -0,0 +1,256 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use log::warn;
+pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
+use markup5ever::{local_name, namespace_url, ns};
+use std::default::Default;
+use std::io::{self, Write};
+
+use crate::{LocalName, QualName};
+
+pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()>
+where
+    Wr: Write,
+    T: Serialize,
+{
+    let mut ser = HtmlSerializer::new(writer, opts.clone());
+    node.serialize(&mut ser, opts.traversal_scope)
+}
+
+#[derive(Clone)]
+pub struct SerializeOpts {
+    /// Is scripting enabled?
+    pub scripting_enabled: bool,
+
+    /// Serialize the root node? Default: ChildrenOnly
+    pub traversal_scope: TraversalScope,
+
+    /// If the serializer is asked to serialize an invalid tree, the default
+    /// behavior is to panic in the event that an `end_elem` is created without a
+    /// matching `start_elem`. Setting this to true will prevent those panics by
+    /// creating a default parent on the element stack. No extra start elem will
+    /// actually be written. Default: false
+    pub create_missing_parent: bool,
+}
+
+impl Default for SerializeOpts {
+    fn default() -> SerializeOpts {
+        SerializeOpts {
+            scripting_enabled: true,
+            traversal_scope: TraversalScope::ChildrenOnly(None),
+            create_missing_parent: false,
+        }
+    }
+}
+
+#[derive(Default)]
+struct ElemInfo {
+    html_name: Option<LocalName>,
+    ignore_children: bool
+}
+
+pub struct HtmlSerializer<Wr: Write> {
+    pub writer: Wr,
+    opts: SerializeOpts,
+    stack: Vec<ElemInfo>,
+}
+
+fn tagname(name: &QualName) -> LocalName {
+    match name.ns {
+        ns!(html) | ns!(mathml) | ns!(svg) => (),
+        ref ns => {
+            // FIXME(#122)
+            warn!("node with weird namespace {:?}", ns);
+        },
+    }
+
+    name.local.clone()
+}
+
+impl<Wr: Write> HtmlSerializer<Wr> {
+    pub fn new(writer: Wr, opts: SerializeOpts) -> Self {
+        let html_name = match opts.traversal_scope {
+            TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None,
+            TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)),
+        };
+        HtmlSerializer {
+            writer,
+            opts,
+            stack: vec![ElemInfo {
+                html_name,
+                ignore_children: false,
+            }],
+        }
+    }
+
+    fn parent(&mut self) -> &mut ElemInfo {
+        if self.stack.is_empty() {
+            if self.opts.create_missing_parent {
+                warn!("ElemInfo stack empty, creating new parent");
+                self.stack.push(Default::default());
+            } else {
+                panic!("no parent ElemInfo")
+            }
+        }
+        self.stack.last_mut().unwrap()
+    }
+
+    fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> {
+        for c in text.chars() {
+            match c {
+                '&' => self.writer.write_all(b"&amp;"),
+                '\u{00A0}' => self.writer.write_all(b"&nbsp;"),
+                '"' if attr_mode => self.writer.write_all(b"&quot;"),
+                '<' if !attr_mode => self.writer.write_all(b"&lt;"),
+                '>' if !attr_mode => self.writer.write_all(b"&gt;"),
+                c => self.writer.write_fmt(format_args!("{}", c)),
+            }?;
+        }
+        Ok(())
+    }
+}
+
+impl<Wr: Write> Serializer for HtmlSerializer<Wr> {
+    fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()>
+    where
+        AttrIter: Iterator<Item = AttrRef<'a>>,
+    {
+        let html_name = match name.ns {
+            ns!(html) => Some(name.local.clone()),
+            _ => None,
+        };
+
+        if self.parent().ignore_children {
+            self.stack.push(ElemInfo {
+                html_name,
+                ignore_children: true,
+            });
+            return Ok(());
+        }
+
+        self.writer.write_all(b"<")?;
+        self.writer.write_all(tagname(&name).as_bytes())?;
+        for (name, value) in attrs {
+            self.writer.write_all(b" ")?;
+
+            match name.ns {
+                ns!() => (),
+                ns!(xml) => self.writer.write_all(b"xml:")?,
+                ns!(xmlns) => {
+                    if name.local != local_name!("xmlns") {
+                        self.writer.write_all(b"xmlns:")?;
+                    }
+                },
+                ns!(xlink) => self.writer.write_all(b"xlink:")?,
+                ref ns => {
+                    // FIXME(#122)
+                    warn!("attr with weird namespace {:?}", ns);
+                    self.writer.write_all(b"unknown_namespace:")?;
+                },
+            }
+
+            self.writer.write_all(name.local.as_bytes())?;
+            self.writer.write_all(b"=\"")?;
+            self.write_escaped(value, true)?;
+            self.writer.write_all(b"\"")?;
+        }
+        self.writer.write_all(b">")?;
+
+        let ignore_children = name.ns == ns!(html) &&
+            match name.local {
+                local_name!("area") |
+                local_name!("base") |
+                local_name!("basefont") |
+                local_name!("bgsound") |
+                local_name!("br") |
+                local_name!("col") |
+                local_name!("embed") |
+                local_name!("frame") |
+                local_name!("hr") |
+                local_name!("img") |
+                local_name!("input") |
+                local_name!("keygen") |
+                local_name!("link") |
+                local_name!("meta") |
+                local_name!("param") |
+                local_name!("source") |
+                local_name!("track") |
+                local_name!("wbr") => true,
+                _ => false,
+            };
+
+        self.stack.push(ElemInfo {
+            html_name,
+            ignore_children,
+        });
+
+        Ok(())
+    }
+
+    fn end_elem(&mut self, name: QualName) -> io::Result<()> {
+        let info = match self.stack.pop() {
+            Some(info) => info,
+            None if self.opts.create_missing_parent => {
+                warn!("missing ElemInfo, creating default.");
+                Default::default()
+            },
+            _ => panic!("no ElemInfo"),
+        };
+        if info.ignore_children {
+            return Ok(());
+        }
+
+        self.writer.write_all(b"</")?;
+        self.writer.write_all(tagname(&name).as_bytes())?;
+        self.writer.write_all(b">")
+    }
+
+    fn write_text(&mut self, text: &str) -> io::Result<()> {
+        let escape = match self.parent().html_name {
+            Some(local_name!("style")) |
+            Some(local_name!("script")) |
+            Some(local_name!("xmp")) |
+            Some(local_name!("iframe")) |
+            Some(local_name!("noembed")) |
+            Some(local_name!("noframes")) |
+            Some(local_name!("plaintext")) => false,
+
+            Some(local_name!("noscript")) => !self.opts.scripting_enabled,
+
+            _ => true,
+        };
+
+        if escape {
+            self.write_escaped(text, false)
+        } else {
+            self.writer.write_all(text.as_bytes())
+        }
+    }
+
+    fn write_comment(&mut self, text: &str) -> io::Result<()> {
+        self.writer.write_all(b"<!--")?;
+        self.writer.write_all(text.as_bytes())?;
+        self.writer.write_all(b"-->")
+    }
+
+    fn write_doctype(&mut self, name: &str) -> io::Result<()> {
+        self.writer.write_all(b"<!DOCTYPE ")?;
+        self.writer.write_all(name.as_bytes())?;
+        self.writer.write_all(b">")
+    }
+
+    fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> {
+        self.writer.write_all(b"<?")?;
+        self.writer.write_all(target.as_bytes())?;
+        self.writer.write_all(b" ")?;
+        self.writer.write_all(data.as_bytes())?;
+        self.writer.write_all(b">")
+    }
+}
diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs
new file mode 100644
index 0000000..a52485d
--- /dev/null
+++ b/src/tokenizer/char_ref/mod.rs
@@ -0,0 +1,449 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use super::{TokenSink, Tokenizer};
+use crate::buffer_queue::BufferQueue;
+use crate::data;
+use crate::tendril::StrTendril;
+use crate::util::str::is_ascii_alnum;
+
+use log::debug;
+use mac::format_if;
+use std::borrow::Cow::Borrowed;
+use std::char::from_u32;
+
+use self::State::*;
+pub use self::Status::*;
+
+//§ tokenizing-character-references
+pub struct CharRef {
+    /// The resulting character(s)
+    pub chars: [char; 2],
+
+    /// How many slots in `chars` are valid?
+    pub num_chars: u8,
+}
+
+pub enum Status {
+    Stuck,
+    Progress,
+    Done,
+}
+
+#[derive(Debug)]
+enum State {
+    Begin,
+    Octothorpe,
+    Numeric(u32), // base
+    NumericSemicolon,
+    Named,
+    BogusName,
+}
+
+pub struct CharRefTokenizer {
+    state: State,
+    addnl_allowed: Option<char>,
+    result: Option<CharRef>,
+
+    num: u32,
+    num_too_big: bool,
+    seen_digit: bool,
+    hex_marker: Option<char>,
+
+    name_buf_opt: Option<StrTendril>,
+    name_match: Option<(u32, u32)>,
+    name_len: usize,
+}
+
+impl CharRefTokenizer {
+    // NB: We assume that we have an additional allowed character iff we're
+    // tokenizing in an attribute value.
+    pub fn new(addnl_allowed: Option<char>) -> CharRefTokenizer {
+        CharRefTokenizer {
+            state: Begin,
+            addnl_allowed,
+            result: None,
+            num: 0,
+            num_too_big: false,
+            seen_digit: false,
+            hex_marker: None,
+            name_buf_opt: None,
+            name_match: None,
+            name_len: 0,
+        }
+    }
+
+    // A CharRefTokenizer can only tokenize one character reference,
+    // so this method consumes the tokenizer.
+    pub fn get_result(self) -> CharRef {
+        self.result.expect("get_result called before done")
+    }
+
+    fn name_buf(&self) -> &StrTendril {
+        self.name_buf_opt
+            .as_ref()
+            .expect("name_buf missing in named character reference")
+    }
+
+    fn name_buf_mut(&mut self) -> &mut StrTendril {
+        self.name_buf_opt
+            .as_mut()
+            .expect("name_buf missing in named character reference")
+    }
+
+    fn finish_none(&mut self) -> Status {
+        self.result = Some(CharRef {
+            chars: ['\0', '\0'],
+            num_chars: 0,
+        });
+        Done
+    }
+
+    fn finish_one(&mut self, c: char) -> Status {
+        self.result = Some(CharRef {
+            chars: [c, '\0'],
+            num_chars: 1,
+        });
+        Done
+    }
+}
+
+impl CharRefTokenizer {
+    pub fn step<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+    ) -> Status {
+        if self.result.is_some() {
+            return Done;
+        }
+
+        debug!("char ref tokenizer stepping in state {:?}", self.state);
+        match self.state {
+            Begin => self.do_begin(tokenizer, input),
+            Octothorpe => self.do_octothorpe(tokenizer, input),
+            Numeric(base) => self.do_numeric(tokenizer, input, base),
+            NumericSemicolon => self.do_numeric_semicolon(tokenizer, input),
+            Named => self.do_named(tokenizer, input),
+            BogusName => self.do_bogus_name(tokenizer, input),
+        }
+    }
+
+    fn do_begin<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+    ) -> Status {
+        match unwrap_or_return!(tokenizer.peek(input), Stuck) {
+            '\t' | '\n' | '\x0C' | ' ' | '<' | '&' => self.finish_none(),
+            c if Some(c) == self.addnl_allowed => self.finish_none(),
+
+            '#' => {
+                tokenizer.discard_char(input);
+                self.state = Octothorpe;
+                Progress
+            },
+
+            _ => {
+                self.state = Named;
+                self.name_buf_opt = Some(StrTendril::new());
+                Progress
+            },
+        }
+    }
+
+    fn do_octothorpe<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+    ) -> Status {
+        let c = unwrap_or_return!(tokenizer.peek(input), Stuck);
+        match c {
+            'x' | 'X' => {
+                tokenizer.discard_char(input);
+                self.hex_marker = Some(c);
+                self.state = Numeric(16);
+            },
+
+            _ => {
+                self.hex_marker = None;
+                self.state = Numeric(10);
+            },
+        }
+        Progress
+    }
+
+    fn do_numeric<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+        base: u32,
+    ) -> Status {
+        let c = unwrap_or_return!(tokenizer.peek(input), Stuck);
+        match c.to_digit(base) {
+            Some(n) => {
+                tokenizer.discard_char(input);
+                self.num = self.num.wrapping_mul(base);
+                if self.num > 0x10FFFF {
+                    // We might overflow, and the character is definitely invalid.
+                    // We still parse digits and semicolon, but don't use the result.
+                    self.num_too_big = true;
+                }
+                self.num = self.num.wrapping_add(n);
+                self.seen_digit = true;
+                Progress
+            },
+
+            None if !self.seen_digit => self.unconsume_numeric(tokenizer, input),
+
+            None => {
+                self.state = NumericSemicolon;
+                Progress
+            },
+        }
+    }
+
+    fn do_numeric_semicolon<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+    ) -> Status {
+        match unwrap_or_return!(tokenizer.peek(input), Stuck) {
+            ';' => tokenizer.discard_char(input),
+            _ => tokenizer.emit_error(Borrowed(
+                "Semicolon missing after numeric character reference",
+            )),
+        };
+        self.finish_numeric(tokenizer)
+    }
+
+    fn unconsume_numeric<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+    ) -> Status {
+        let mut unconsume = StrTendril::from_char('#');
+        match self.hex_marker {
+            Some(c) => unconsume.push_char(c),
+            None => (),
+        }
+
+        input.push_front(unconsume);
+        tokenizer.emit_error(Borrowed("Numeric character reference without digits"));
+        self.finish_none()
+    }
+
+    fn finish_numeric<Sink: TokenSink>(&mut self, tokenizer: &mut Tokenizer<Sink>) -> Status {
+        fn conv(n: u32) -> char {
+            from_u32(n).expect("invalid char missed by error handling cases")
+        }
+
+        let (c, error) = match self.num {
+            n if (n > 0x10FFFF) || self.num_too_big => ('\u{fffd}', true),
+            0x00 | 0xD800..=0xDFFF => ('\u{fffd}', true),
+
+            0x80..=0x9F => match data::C1_REPLACEMENTS[(self.num - 0x80) as usize] {
+                Some(c) => (c, true),
+                None => (conv(self.num), true),
+            },
+
+            0x01..=0x08 | 0x0B | 0x0D..=0x1F | 0x7F | 0xFDD0..=0xFDEF => (conv(self.num), true),
+
+            n if (n & 0xFFFE) == 0xFFFE => (conv(n), true),
+
+            n => (conv(n), false),
+        };
+
+        if error {
+            let msg = format_if!(
+                tokenizer.opts.exact_errors,
+                "Invalid numeric character reference",
+                "Invalid numeric character reference value 0x{:06X}",
+                self.num
+            );
+            tokenizer.emit_error(msg);
+        }
+
+        self.finish_one(c)
+    }
+
+    fn do_named<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+    ) -> Status {
+        let c = unwrap_or_return!(tokenizer.get_char(input), Stuck);
+        self.name_buf_mut().push_char(c);
+        match data::NAMED_ENTITIES.get(&self.name_buf()[..]) {
+            // We have either a full match or a prefix of one.
+            Some(&m) => {
+                if m.0 != 0 {
+                    // We have a full match, but there might be a longer one to come.
+                    self.name_match = Some(m);
+                    self.name_len = self.name_buf().len();
+                }
+                // Otherwise we just have a prefix match.
+                Progress
+            },
+
+            // Can't continue the match.
+            None => self.finish_named(tokenizer, input, Some(c)),
+        }
+    }
+
+    fn emit_name_error<Sink: TokenSink>(&mut self, tokenizer: &mut Tokenizer<Sink>) {
+        let msg = format_if!(
+            tokenizer.opts.exact_errors,
+            "Invalid character reference",
+            "Invalid character reference &{}",
+            self.name_buf()
+        );
+        tokenizer.emit_error(msg);
+    }
+
+    fn unconsume_name(&mut self, input: &mut BufferQueue) {
+        input.push_front(self.name_buf_opt.take().unwrap());
+    }
+
+    fn finish_named<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+        end_char: Option<char>,
+    ) -> Status {
+        match self.name_match {
+            None => {
+                match end_char {
+                    Some(c) if is_ascii_alnum(c) => {
+                        // Keep looking for a semicolon, to determine whether
+                        // we emit a parse error.
+                        self.state = BogusName;
+                        return Progress;
+                    },
+
+                    // Check length because &; is not a parse error.
+                    Some(';') if self.name_buf().len() > 1 => self.emit_name_error(tokenizer),
+
+                    _ => (),
+                }
+                self.unconsume_name(input);
+                self.finish_none()
+            },
+
+            Some((c1, c2)) => {
+                // We have a complete match, but we may have consumed
+                // additional characters into self.name_buf.  Usually
+                // at least one, but several in cases like
+                //
+                //     &not    => match for U+00AC
+                //     &noti   => valid prefix for &notin
+                //     &notit  => can't continue match
+
+                let name_len = self.name_len;
+                assert!(name_len > 0);
+                let last_matched = self.name_buf()[name_len - 1..].chars().next().unwrap();
+
+                // There might not be a next character after the match, if
+                // we had a full match and then hit EOF.
+                let next_after = if name_len == self.name_buf().len() {
+                    None
+                } else {
+                    Some(self.name_buf()[name_len..].chars().next().unwrap())
+                };
+
+                // "If the character reference is being consumed as part of an
+                // attribute, and the last character matched is not a U+003B
+                // SEMICOLON character (;), and the next character is either a
+                // U+003D EQUALS SIGN character (=) or an alphanumeric ASCII
+                // character, then, for historical reasons, all the characters
+                // that were matched after the U+0026 AMPERSAND character (&)
+                // must be unconsumed, and nothing is returned. However, if
+                // this next character is in fact a U+003D EQUALS SIGN
+                // character (=), then this is a parse error"
+
+                let unconsume_all = match (self.addnl_allowed, last_matched, next_after) {
+                    (_, ';', _) => false,
+                    (Some(_), _, Some('=')) => {
+                        tokenizer.emit_error(Borrowed(
+                            "Equals sign after character reference in attribute",
+                        ));
+                        true
+                    },
+                    (Some(_), _, Some(c)) if is_ascii_alnum(c) => true,
+                    _ => {
+                        tokenizer.emit_error(Borrowed(
+                            "Character reference does not end with semicolon",
+                        ));
+                        false
+                    },
+                };
+
+                if unconsume_all {
+                    self.unconsume_name(input);
+                    self.finish_none()
+                } else {
+                    input.push_front(StrTendril::from_slice(&self.name_buf()[name_len..]));
+                    self.result = Some(CharRef {
+                        chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()],
+                        num_chars: if c2 == 0 { 1 } else { 2 },
+                    });
+                    Done
+                }
+            },
+        }
+    }
+
+    fn do_bogus_name<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+    ) -> Status {
+        let c = unwrap_or_return!(tokenizer.get_char(input), Stuck);
+        self.name_buf_mut().push_char(c);
+        match c {
+            _ if is_ascii_alnum(c) => return Progress,
+            ';' => self.emit_name_error(tokenizer),
+            _ => (),
+        }
+        self.unconsume_name(input);
+        self.finish_none()
+    }
+
+    pub fn end_of_file<Sink: TokenSink>(
+        &mut self,
+        tokenizer: &mut Tokenizer<Sink>,
+        input: &mut BufferQueue,
+    ) {
+        while self.result.is_none() {
+            match self.state {
+                Begin => drop(self.finish_none()),
+
+                Numeric(_) if !self.seen_digit => drop(self.unconsume_numeric(tokenizer, input)),
+
+                Numeric(_) | NumericSemicolon => {
+                    tokenizer.emit_error(Borrowed("EOF in numeric character reference"));
+                    self.finish_numeric(tokenizer);
+                },
+
+                Named => drop(self.finish_named(tokenizer, input, None)),
+
+                BogusName => {
+                    self.unconsume_name(input);
+                    self.finish_none();
+                },
+
+                Octothorpe => {
+                    input.push_front(StrTendril::from_slice("#"));
+                    tokenizer.emit_error(Borrowed("EOF after '#' in character reference"));
+                    self.finish_none();
+                },
+            }
+        }
+    }
+}
diff --git a/src/tokenizer/interface.rs b/src/tokenizer/interface.rs
new file mode 100644
index 0000000..22d11be
--- /dev/null
+++ b/src/tokenizer/interface.rs
@@ -0,0 +1,110 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use crate::interface::Attribute;
+use crate::tendril::StrTendril;
+use crate::tokenizer::states;
+use crate::LocalName;
+use std::borrow::Cow;
+
+pub use self::TagKind::{EndTag, StartTag};
+pub use self::Token::{CharacterTokens, CommentToken, DoctypeToken, TagToken};
+pub use self::Token::{EOFToken, NullCharacterToken, ParseError};
+
+/// A `DOCTYPE` token.
+// FIXME: already exists in Servo DOM
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub struct Doctype {
+    pub name: Option<StrTendril>,
+    pub public_id: Option<StrTendril>,
+    pub system_id: Option<StrTendril>,
+    pub force_quirks: bool,
+}
+
+impl Doctype {
+    pub fn new() -> Doctype {
+        Doctype {
+            name: None,
+            public_id: None,
+            system_id: None,
+            force_quirks: false,
+        }
+    }
+}
+
+#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
+pub enum TagKind {
+    StartTag,
+    EndTag,
+}
+
+/// A tag token.
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub struct Tag {
+    pub kind: TagKind,
+    pub name: LocalName,
+    pub self_closing: bool,
+    pub attrs: Vec<Attribute>,
+}
+
+impl Tag {
+    /// Are the tags equivalent when we don't care about attribute order?
+    /// Also ignores the self-closing flag.
+    pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool {
+        if (self.kind != other.kind) || (self.name != other.name) {
+            return false;
+        }
+
+        let mut self_attrs = self.attrs.clone();
+        let mut other_attrs = other.attrs.clone();
+        self_attrs.sort();
+        other_attrs.sort();
+
+        self_attrs == other_attrs
+    }
+}
+
+#[derive(PartialEq, Eq, Debug)]
+pub enum Token {
+    DoctypeToken(Doctype),
+    TagToken(Tag),
+    CommentToken(StrTendril),
+    CharacterTokens(StrTendril),
+    NullCharacterToken,
+    EOFToken,
+    ParseError(Cow<'static, str>),
+}
+
+#[derive(Debug, PartialEq)]
+#[must_use]
+pub enum TokenSinkResult<Handle> {
+    Continue,
+    Script(Handle),
+    Plaintext,
+    RawData(states::RawKind),
+}
+
+/// Types which can receive tokens from the tokenizer.
+pub trait TokenSink {
+    type Handle;
+
+    /// Process a token.
+    fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult<Self::Handle>;
+
+    // Signal sink that tokenization reached the end.
+    fn end(&mut self) {}
+
+    /// Used in the markup declaration open state. By default, this always
+    /// returns false and thus all CDATA sections are tokenized as bogus
+    /// comments.
+    /// https://html.spec.whatwg.org/multipage/#markup-declaration-open-state
+    fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
+        false
+    }
+}
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
new file mode 100644
index 0000000..267fdf3
--- /dev/null
+++ b/src/tokenizer/mod.rs
@@ -0,0 +1,1713 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! The HTML5 tokenizer.
+
+pub use self::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
+pub use self::interface::{CommentToken, DoctypeToken, TagToken, Token};
+pub use self::interface::{Doctype, EndTag, StartTag, Tag, TagKind};
+pub use self::interface::{TokenSink, TokenSinkResult};
+
+use self::states::{DoctypeIdKind, Public, System};
+use self::states::{DoubleEscaped, Escaped};
+use self::states::{DoubleQuoted, SingleQuoted, Unquoted};
+use self::states::{Rawtext, Rcdata, ScriptData, ScriptDataEscaped};
+
+use self::char_ref::{CharRef, CharRefTokenizer};
+
+use crate::util::str::lower_ascii_letter;
+
+use log::debug;
+use mac::{_tt_as_expr_hack, format_if, matches};
+use markup5ever::{namespace_url, ns, small_char_set};
+use std::borrow::Cow::{self, Borrowed};
+use std::collections::BTreeMap;
+use std::default::Default;
+use std::mem::replace;
+
+pub use crate::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult};
+use crate::tendril::StrTendril;
+use crate::{Attribute, LocalName, QualName, SmallCharSet};
+
+mod char_ref;
+mod interface;
+pub mod states;
+
+pub enum ProcessResult<Handle> {
+    Continue,
+    Suspend,
+    Script(Handle),
+}
+
+#[must_use]
+pub enum TokenizerResult<Handle> {
+    Done,
+    Script(Handle),
+}
+
+fn option_push(opt_str: &mut Option<StrTendril>, c: char) {
+    match *opt_str {
+        Some(ref mut s) => s.push_char(c),
+        None => *opt_str = Some(StrTendril::from_char(c)),
+    }
+}
+
+/// Tokenizer options, with an impl for `Default`.
+#[derive(Clone)]
+pub struct TokenizerOpts {
+    /// Report all parse errors described in the spec, at some
+    /// performance penalty?  Default: false
+    pub exact_errors: bool,
+
+    /// Discard a `U+FEFF BYTE ORDER MARK` if we see one at the beginning
+    /// of the stream?  Default: true
+    pub discard_bom: bool,
+
+    /// Keep a record of how long we spent in each state?  Printed
+    /// when `end()` is called.  Default: false
+    pub profile: bool,
+
+    /// Initial state override.  Only the test runner should use
+    /// a non-`None` value!
+    pub initial_state: Option<states::State>,
+
+    /// Last start tag.  Only the test runner should use a
+    /// non-`None` value!
+    ///
+    /// FIXME: Can't use Tendril because we want TokenizerOpts
+    /// to be Send.
+    pub last_start_tag_name: Option<String>,
+}
+
+impl Default for TokenizerOpts {
+    fn default() -> TokenizerOpts {
+        TokenizerOpts {
+            exact_errors: false,
+            discard_bom: true,
+            profile: false,
+            initial_state: None,
+            last_start_tag_name: None,
+        }
+    }
+}
+
+/// The HTML tokenizer.
+pub struct Tokenizer<Sink> {
+    /// Options controlling the behavior of the tokenizer.
+    opts: TokenizerOpts,
+
+    /// Destination for tokens we emit.
+    pub sink: Sink,
+
+    /// The abstract machine state as described in the spec.
+    state: states::State,
+
+    /// Are we at the end of the file, once buffers have been processed
+    /// completely? This affects whether we will wait for lookahead or not.
+    at_eof: bool,
+
+    /// Tokenizer for character references, if we're tokenizing
+    /// one at the moment.
+    char_ref_tokenizer: Option<Box<CharRefTokenizer>>,
+
+    /// Current input character.  Just consumed, may reconsume.
+    current_char: char,
+
+    /// Should we reconsume the current input character?
+    reconsume: bool,
+
+    /// Did we just consume \r, translating it to \n?  In that case we need
+    /// to ignore the next character if it's \n.
+    ignore_lf: bool,
+
+    /// Discard a U+FEFF BYTE ORDER MARK if we see one?  Only done at the
+    /// beginning of the stream.
+    discard_bom: bool,
+
+    /// Current tag kind.
+    current_tag_kind: TagKind,
+
+    /// Current tag name.
+    current_tag_name: StrTendril,
+
+    /// Current tag is self-closing?
+    current_tag_self_closing: bool,
+
+    /// Current tag attributes.
+    current_tag_attrs: Vec<Attribute>,
+
+    /// Current attribute name.
+    current_attr_name: StrTendril,
+
+    /// Current attribute value.
+    current_attr_value: StrTendril,
+
+    /// Current comment.
+    current_comment: StrTendril,
+
+    /// Current doctype token.
+    current_doctype: Doctype,
+
+    /// Last start tag name, for use in checking "appropriate end tag".
+    last_start_tag_name: Option<LocalName>,
+
+    /// The "temporary buffer" mentioned in the spec.
+    temp_buf: StrTendril,
+
+    /// Record of how many ns we spent in each state, if profiling is enabled.
+    state_profile: BTreeMap<states::State, u64>,
+
+    /// Record of how many ns we spent in the token sink.
+    time_in_sink: u64,
+
+    /// Track current line
+    current_line: u64,
+}
+
+impl<Sink: TokenSink> Tokenizer<Sink> {
+    /// Create a new tokenizer which feeds tokens to a particular `TokenSink`.
+    pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer<Sink> {
+        let start_tag_name = opts
+            .last_start_tag_name
+            .take()
+            .map(|s| LocalName::from(&*s));
+        let state = opts.initial_state.unwrap_or(states::Data);
+        let discard_bom = opts.discard_bom;
+        Tokenizer {
+            opts,
+            sink,
+            state,
+            char_ref_tokenizer: None,
+            at_eof: false,
+            current_char: '\0',
+            reconsume: false,
+            ignore_lf: false,
+            discard_bom,
+            current_tag_kind: StartTag,
+            current_tag_name: StrTendril::new(),
+            current_tag_self_closing: false,
+            current_tag_attrs: vec![],
+            current_attr_name: StrTendril::new(),
+            current_attr_value: StrTendril::new(),
+            current_comment: StrTendril::new(),
+            current_doctype: Doctype::new(),
+            last_start_tag_name: start_tag_name,
+            temp_buf: StrTendril::new(),
+            state_profile: BTreeMap::new(),
+            time_in_sink: 0,
+            current_line: 1,
+        }
+    }
+
+    /// Feed an input string into the tokenizer.
+    pub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle> {
+        if input.is_empty() {
+            return TokenizerResult::Done;
+        }
+
+        if self.discard_bom {
+            if let Some(c) = input.peek() {
+                if c == '\u{feff}' {
+                    input.next();
+                }
+            } else {
+                return TokenizerResult::Done;
+            }
+        };
+
+        self.run(input)
+    }
+
+    pub fn set_plaintext_state(&mut self) {
+        self.state = states::Plaintext;
+    }
+
+    fn process_token(&mut self, token: Token) -> TokenSinkResult<Sink::Handle> {
+        if self.opts.profile {
+            let (ret, dt) = time!(self.sink.process_token(token, self.current_line));
+            self.time_in_sink += dt;
+            ret
+        } else {
+            self.sink.process_token(token, self.current_line)
+        }
+    }
+
+    fn process_token_and_continue(&mut self, token: Token) {
+        assert!(matches!(
+            self.process_token(token),
+            TokenSinkResult::Continue
+        ));
+    }
+
+    //§ preprocessing-the-input-stream
+    // Get the next input character, which might be the character
+    // 'c' that we already consumed from the buffers.
+    fn get_preprocessed_char(&mut self, mut c: char, input: &mut BufferQueue) -> Option<char> {
+        if self.ignore_lf {
+            self.ignore_lf = false;
+            if c == '\n' {
+                c = unwrap_or_return!(input.next(), None);
+            }
+        }
+
+        if c == '\r' {
+            self.ignore_lf = true;
+            c = '\n';
+        }
+
+        if c == '\n' {
+            self.current_line += 1;
+        }
+
+        if self.opts.exact_errors &&
+            match c as u32 {
+                0x01..=0x08 | 0x0B | 0x0E..=0x1F | 0x7F..=0x9F | 0xFDD0..=0xFDEF => true,
+                n if (n & 0xFFFE) == 0xFFFE => true,
+                _ => false,
+            }
+        {
+            let msg = format!("Bad character {}", c);
+            self.emit_error(Cow::Owned(msg));
+        }
+
+        debug!("got character {}", c);
+        self.current_char = c;
+        Some(c)
+    }
+
+    //§ tokenization
+    // Get the next input character, if one is available.
+    fn get_char(&mut self, input: &mut BufferQueue) -> Option<char> {
+        if self.reconsume {
+            self.reconsume = false;
+            Some(self.current_char)
+        } else {
+            input
+                .next()
+                .and_then(|c| self.get_preprocessed_char(c, input))
+        }
+    }
+
+    fn pop_except_from(&mut self, input: &mut BufferQueue, set: SmallCharSet) -> Option<SetResult> {
+        // Bail to the slow path for various corner cases.
+        // This means that `FromSet` can contain characters not in the set!
+        // It shouldn't matter because the fallback `FromSet` case should
+        // always do the same thing as the `NotFromSet` case.
+        if self.opts.exact_errors || self.reconsume || self.ignore_lf {
+            return self.get_char(input).map(FromSet);
+        }
+
+        let d = input.pop_except_from(set);
+        debug!("got characters {:?}", d);
+        match d {
+            Some(FromSet(c)) => self.get_preprocessed_char(c, input).map(FromSet),
+
+            // NB: We don't set self.current_char for a run of characters not
+            // in the set.  It shouldn't matter for the codepaths that use
+            // this.
+            _ => d,
+        }
+    }
+
+    // Check if the next characters are an ASCII case-insensitive match.  See
+    // BufferQueue::eat.
+    //
+    // NB: this doesn't do input stream preprocessing or set the current input
+    // character.
+    fn eat(
+        &mut self,
+        input: &mut BufferQueue,
+        pat: &str,
+        eq: fn(&u8, &u8) -> bool,
+    ) -> Option<bool> {
+        input.push_front(replace(&mut self.temp_buf, StrTendril::new()));
+        match input.eat(pat, eq) {
+            None if self.at_eof => Some(false),
+            None => {
+                while let Some(c) = input.next() {
+                    self.temp_buf.push_char(c);
+                }
+                None
+            },
+            Some(matched) => Some(matched),
+        }
+    }
+
+    /// Run the state machine for as long as we can.
+    fn run(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle> {
+        if self.opts.profile {
+            loop {
+                let state = self.state;
+                let old_sink = self.time_in_sink;
+                let (run, mut dt) = time!(self.step(input));
+                dt -= (self.time_in_sink - old_sink);
+                let new = match self.state_profile.get_mut(&state) {
+                    Some(x) => {
+                        *x += dt;
+                        false
+                    },
+                    None => true,
+                };
+                if new {
+                    // do this here because of borrow shenanigans
+                    self.state_profile.insert(state, dt);
+                }
+                match run {
+                    ProcessResult::Continue => (),
+                    ProcessResult::Suspend => break,
+                    ProcessResult::Script(node) => return TokenizerResult::Script(node),
+                }
+            }
+        } else {
+            loop {
+                match self.step(input) {
+                    ProcessResult::Continue => (),
+                    ProcessResult::Suspend => break,
+                    ProcessResult::Script(node) => return TokenizerResult::Script(node),
+                }
+            }
+        }
+        TokenizerResult::Done
+    }
+
+    fn bad_char_error(&mut self) {
+        let msg = format_if!(
+            self.opts.exact_errors,
+            "Bad character",
+            "Saw {} in state {:?}",
+            self.current_char,
+            self.state
+        );
+        self.emit_error(msg);
+    }
+
+    fn bad_eof_error(&mut self) {
+        let msg = format_if!(
+            self.opts.exact_errors,
+            "Unexpected EOF",
+            "Saw EOF in state {:?}",
+            self.state
+        );
+        self.emit_error(msg);
+    }
+
+    fn emit_char(&mut self, c: char) {
+        self.process_token_and_continue(match c {
+            '\0' => NullCharacterToken,
+            _ => CharacterTokens(StrTendril::from_char(c)),
+        });
+    }
+
+    // The string must not contain '\0'!
+    fn emit_chars(&mut self, b: StrTendril) {
+        self.process_token_and_continue(CharacterTokens(b));
+    }
+
+    fn emit_current_tag(&mut self) -> ProcessResult<Sink::Handle> {
+        self.finish_attribute();
+
+        let name = LocalName::from(&*self.current_tag_name);
+        self.current_tag_name.clear();
+
+        match self.current_tag_kind {
+            StartTag => {
+                self.last_start_tag_name = Some(name.clone());
+            },
+            EndTag => {
+                if !self.current_tag_attrs.is_empty() {
+                    self.emit_error(Borrowed("Attributes on an end tag"));
+                }
+                if self.current_tag_self_closing {
+                    self.emit_error(Borrowed("Self-closing end tag"));
+                }
+            },
+        }
+
+        let token = TagToken(Tag {
+            kind: self.current_tag_kind,
+            name,
+            self_closing: self.current_tag_self_closing,
+            attrs: replace(&mut self.current_tag_attrs, vec![]),
+        });
+
+        match self.process_token(token) {
+            TokenSinkResult::Continue => ProcessResult::Continue,
+            TokenSinkResult::Plaintext => {
+                self.state = states::Plaintext;
+                ProcessResult::Continue
+            },
+            TokenSinkResult::Script(node) => {
+                self.state = states::Data;
+                ProcessResult::Script(node)
+            },
+            TokenSinkResult::RawData(kind) => {
+                self.state = states::RawData(kind);
+                ProcessResult::Continue
+            },
+        }
+    }
+
+    fn emit_temp_buf(&mut self) {
+        // FIXME: Make sure that clearing on emit is spec-compatible.
+        let buf = replace(&mut self.temp_buf, StrTendril::new());
+        self.emit_chars(buf);
+    }
+
+    fn clear_temp_buf(&mut self) {
+        // Do this without a new allocation.
+        self.temp_buf.clear();
+    }
+
+    fn emit_current_comment(&mut self) {
+        let comment = replace(&mut self.current_comment, StrTendril::new());
+        self.process_token_and_continue(CommentToken(comment));
+    }
+
+    fn discard_tag(&mut self) {
+        self.current_tag_name.clear();
+        self.current_tag_self_closing = false;
+        self.current_tag_attrs = vec![];
+    }
+
+    fn create_tag(&mut self, kind: TagKind, c: char) {
+        self.discard_tag();
+        self.current_tag_name.push_char(c);
+        self.current_tag_kind = kind;
+    }
+
+    fn have_appropriate_end_tag(&self) -> bool {
+        match self.last_start_tag_name.as_ref() {
+            Some(last) => (self.current_tag_kind == EndTag) && (*self.current_tag_name == **last),
+            None => false,
+        }
+    }
+
+    fn create_attribute(&mut self, c: char) {
+        self.finish_attribute();
+
+        self.current_attr_name.push_char(c);
+    }
+
+    fn finish_attribute(&mut self) {
+        if self.current_attr_name.is_empty() {
+            return;
+        }
+
+        // Check for a duplicate attribute.
+        // FIXME: the spec says we should error as soon as the name is finished.
+        // FIXME: linear time search, do we care?
+        let dup = {
+            let name = &*self.current_attr_name;
+            self.current_tag_attrs
+                .iter()
+                .any(|a| &*a.name.local == name)
+        };
+
+        if dup {
+            self.emit_error(Borrowed("Duplicate attribute"));
+            self.current_attr_name.clear();
+            self.current_attr_value.clear();
+        } else {
+            let name = LocalName::from(&*self.current_attr_name);
+            self.current_attr_name.clear();
+            self.current_tag_attrs.push(Attribute {
+                // The tree builder will adjust the namespace if necessary.
+                // This only happens in foreign elements.
+                name: QualName::new(None, ns!(), name),
+                value: replace(&mut self.current_attr_value, StrTendril::new()),
+            });
+        }
+    }
+
+    fn emit_current_doctype(&mut self) {
+        let doctype = replace(&mut self.current_doctype, Doctype::new());
+        self.process_token_and_continue(DoctypeToken(doctype));
+    }
+
+    fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<StrTendril> {
+        match kind {
+            Public => &mut self.current_doctype.public_id,
+            System => &mut self.current_doctype.system_id,
+        }
+    }
+
+    fn clear_doctype_id(&mut self, kind: DoctypeIdKind) {
+        let id = self.doctype_id(kind);
+        match *id {
+            Some(ref mut s) => s.clear(),
+            None => *id = Some(StrTendril::new()),
+        }
+    }
+
+    fn consume_char_ref(&mut self, addnl_allowed: Option<char>) {
+        // NB: The char ref tokenizer assumes we have an additional allowed
+        // character iff we're tokenizing in an attribute value.
+        self.char_ref_tokenizer = Some(Box::new(CharRefTokenizer::new(addnl_allowed)));
+    }
+
+    fn emit_eof(&mut self) {
+        self.process_token_and_continue(EOFToken);
+    }
+
+    fn peek(&mut self, input: &BufferQueue) -> Option<char> {
+        if self.reconsume {
+            Some(self.current_char)
+        } else {
+            input.peek()
+        }
+    }
+
+    fn discard_char(&mut self, input: &mut BufferQueue) {
+        self.get_char(input);
+    }
+
+    fn emit_error(&mut self, error: Cow<'static, str>) {
+        self.process_token_and_continue(ParseError(error));
+    }
+}
+//§ END
+
+// Shorthand for common state machine behaviors.
+macro_rules! shorthand (
+    ( $me:ident : emit $c:expr                     ) => ( $me.emit_char($c);                                   );
+    ( $me:ident : create_tag $kind:ident $c:expr   ) => ( $me.create_tag($kind, $c);                           );
+    ( $me:ident : push_tag $c:expr                 ) => ( $me.current_tag_name.push_char($c);                  );
+    ( $me:ident : discard_tag                      ) => ( $me.discard_tag();                                   );
+    ( $me:ident : discard_char $input:expr         ) => ( $me.discard_char($input);                            );
+    ( $me:ident : push_temp $c:expr                ) => ( $me.temp_buf.push_char($c);                          );
+    ( $me:ident : emit_temp                        ) => ( $me.emit_temp_buf();                                 );
+    ( $me:ident : clear_temp                       ) => ( $me.clear_temp_buf();                                );
+    ( $me:ident : create_attr $c:expr              ) => ( $me.create_attribute($c);                            );
+    ( $me:ident : push_name $c:expr                ) => ( $me.current_attr_name.push_char($c);                 );
+    ( $me:ident : push_value $c:expr               ) => ( $me.current_attr_value.push_char($c);                );
+    ( $me:ident : append_value $c:expr             ) => ( $me.current_attr_value.push_tendril($c);             );
+    ( $me:ident : push_comment $c:expr             ) => ( $me.current_comment.push_char($c);                   );
+    ( $me:ident : append_comment $c:expr           ) => ( $me.current_comment.push_slice($c);                  );
+    ( $me:ident : emit_comment                     ) => ( $me.emit_current_comment();                          );
+    ( $me:ident : clear_comment                    ) => ( $me.current_comment.clear();                         );
+    ( $me:ident : create_doctype                   ) => ( $me.current_doctype = Doctype::new();                );
+    ( $me:ident : push_doctype_name $c:expr        ) => ( option_push(&mut $me.current_doctype.name, $c);      );
+    ( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push($me.doctype_id($k), $c);                 );
+    ( $me:ident : clear_doctype_id $k:ident        ) => ( $me.clear_doctype_id($k);                            );
+    ( $me:ident : force_quirks                     ) => ( $me.current_doctype.force_quirks = true;             );
+    ( $me:ident : emit_doctype                     ) => ( $me.emit_current_doctype();                          );
+    ( $me:ident : error                            ) => ( $me.bad_char_error();                                );
+    ( $me:ident : error_eof                        ) => ( $me.bad_eof_error();                                 );
+);
+
+// Tracing of tokenizer actions.  This adds significant bloat and compile time,
+// so it's behind a cfg flag.
+#[cfg(trace_tokenizer)]
+macro_rules! sh_trace ( ( $me:ident : $($cmds:tt)* ) => ({
+    debug!("  {:s}", stringify!($($cmds)*));
+    shorthand!($me:expr : $($cmds)*);
+}));
+
+#[cfg(not(trace_tokenizer))]
+macro_rules! sh_trace ( ( $me:ident : $($cmds:tt)* ) => ( shorthand!($me: $($cmds)*) ) );
+
+// A little DSL for sequencing shorthand actions.
+macro_rules! go (
+    // A pattern like $($cmd:tt)* ; $($rest:tt)* causes parse ambiguity.
+    // We have to tell the parser how much lookahead we need.
+
+    ( $me:ident : $a:tt                   ; $($rest:tt)* ) => ({ sh_trace!($me: $a);          go!($me: $($rest)*); });
+    ( $me:ident : $a:tt $b:tt             ; $($rest:tt)* ) => ({ sh_trace!($me: $a $b);       go!($me: $($rest)*); });
+    ( $me:ident : $a:tt $b:tt $c:tt       ; $($rest:tt)* ) => ({ sh_trace!($me: $a $b $c);    go!($me: $($rest)*); });
+    ( $me:ident : $a:tt $b:tt $c:tt $d:tt ; $($rest:tt)* ) => ({ sh_trace!($me: $a $b $c $d); go!($me: $($rest)*); });
+
+    // These can only come at the end.
+
+    ( $me:ident : to $s:ident                    ) => ({ $me.state = states::$s; return ProcessResult::Continue;           });
+    ( $me:ident : to $s:ident $k1:expr           ) => ({ $me.state = states::$s($k1); return ProcessResult::Continue;      });
+    ( $me:ident : to $s:ident $k1:ident $k2:expr ) => ({ $me.state = states::$s($k1($k2)); return ProcessResult::Continue; });
+
+    ( $me:ident : reconsume $s:ident                    ) => ({ $me.reconsume = true; go!($me: to $s);         });
+    ( $me:ident : reconsume $s:ident $k1:expr           ) => ({ $me.reconsume = true; go!($me: to $s $k1);     });
+    ( $me:ident : reconsume $s:ident $k1:ident $k2:expr ) => ({ $me.reconsume = true; go!($me: to $s $k1 $k2); });
+
+    ( $me:ident : consume_char_ref             ) => ({ $me.consume_char_ref(None); return ProcessResult::Continue;         });
+    ( $me:ident : consume_char_ref $addnl:expr ) => ({ $me.consume_char_ref(Some($addnl)); return ProcessResult::Continue; });
+
+    // We have a default next state after emitting a tag, but the sink can override.
+    ( $me:ident : emit_tag $s:ident ) => ({
+        $me.state = states::$s;
+        return $me.emit_current_tag();
+    });
+
+    ( $me:ident : eof ) => ({ $me.emit_eof(); return ProcessResult::Suspend; });
+
+    // If nothing else matched, it's a single command
+    ( $me:ident : $($cmd:tt)+ ) => ( sh_trace!($me: $($cmd)+); );
+
+    // or nothing.
+    ( $me:ident : ) => (());
+);
+
+macro_rules! go_match ( ( $me:ident : $x:expr, $($pats:pat),+ => $($cmds:tt)* ) => (
+    match $x {
+        $($pats)|+ => go!($me: $($cmds)*),
+        _ => (),
+    }
+));
+
+// This is a macro because it can cause early return
+// from the function where it is used.
+macro_rules! get_char ( ($me:expr, $input:expr) => (
+    unwrap_or_return!($me.get_char($input), ProcessResult::Suspend)
+));
+
+macro_rules! peek ( ($me:expr, $input:expr) => (
+    unwrap_or_return!($me.peek($input), ProcessResult::Suspend)
+));
+
+macro_rules! pop_except_from ( ($me:expr, $input:expr, $set:expr) => (
+    unwrap_or_return!($me.pop_except_from($input, $set), ProcessResult::Suspend)
+));
+
+macro_rules! eat ( ($me:expr, $input:expr, $pat:expr) => (
+    unwrap_or_return!($me.eat($input, $pat, u8::eq_ignore_ascii_case), ProcessResult::Suspend)
+));
+
+macro_rules! eat_exact ( ($me:expr, $input:expr, $pat:expr) => (
+    unwrap_or_return!($me.eat($input, $pat, u8::eq), ProcessResult::Suspend)
+));
+
+impl<Sink: TokenSink> Tokenizer<Sink> {
+    // Run the state machine for a while.
+    // Return true if we should be immediately re-invoked
+    // (this just simplifies control flow vs. break / continue).
+    #[allow(clippy::never_loop)]
+    fn step(&mut self, input: &mut BufferQueue) -> ProcessResult<Sink::Handle> {
+        if self.char_ref_tokenizer.is_some() {
+            return self.step_char_ref_tokenizer(input);
+        }
+
+        debug!("processing in state {:?}", self.state);
+        match self.state {
+            //§ data-state
+            states::Data => loop {
+                match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) {
+                    FromSet('\0') => go!(self: error; emit '\0'),
+                    FromSet('&') => go!(self: consume_char_ref),
+                    FromSet('<') => go!(self: to TagOpen),
+                    FromSet(c) => go!(self: emit c),
+                    NotFromSet(b) => self.emit_chars(b),
+                }
+            },
+
+            //§ rcdata-state
+            states::RawData(Rcdata) => loop {
+                match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) {
+                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
+                    FromSet('&') => go!(self: consume_char_ref),
+                    FromSet('<') => go!(self: to RawLessThanSign Rcdata),
+                    FromSet(c) => go!(self: emit c),
+                    NotFromSet(b) => self.emit_chars(b),
+                }
+            },
+
+            //§ rawtext-state
+            states::RawData(Rawtext) => loop {
+                match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) {
+                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
+                    FromSet('<') => go!(self: to RawLessThanSign Rawtext),
+                    FromSet(c) => go!(self: emit c),
+                    NotFromSet(b) => self.emit_chars(b),
+                }
+            },
+
+            //§ script-data-state
+            states::RawData(ScriptData) => loop {
+                match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) {
+                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
+                    FromSet('<') => go!(self: to RawLessThanSign ScriptData),
+                    FromSet(c) => go!(self: emit c),
+                    NotFromSet(b) => self.emit_chars(b),
+                }
+            },
+
+            //§ script-data-escaped-state
+            states::RawData(ScriptDataEscaped(Escaped)) => loop {
+                match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) {
+                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
+                    FromSet('-') => go!(self: emit '-'; to ScriptDataEscapedDash Escaped),
+                    FromSet('<') => go!(self: to RawLessThanSign ScriptDataEscaped Escaped),
+                    FromSet(c) => go!(self: emit c),
+                    NotFromSet(b) => self.emit_chars(b),
+                }
+            },
+
+            //§ script-data-double-escaped-state
+            states::RawData(ScriptDataEscaped(DoubleEscaped)) => loop {
+                match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) {
+                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
+                    FromSet('-') => go!(self: emit '-'; to ScriptDataEscapedDash DoubleEscaped),
+                    FromSet('<') => {
+                        go!(self: emit '<'; to RawLessThanSign ScriptDataEscaped DoubleEscaped)
+                    },
+                    FromSet(c) => go!(self: emit c),
+                    NotFromSet(b) => self.emit_chars(b),
+                }
+            },
+
+            //§ plaintext-state
+            states::Plaintext => loop {
+                match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n')) {
+                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
+                    FromSet(c) => go!(self: emit c),
+                    NotFromSet(b) => self.emit_chars(b),
+                }
+            },
+
+            //§ tag-open-state
+            states::TagOpen => loop {
+                match get_char!(self, input) {
+                    '!' => go!(self: clear_temp; to MarkupDeclarationOpen),
+                    '/' => go!(self: to EndTagOpen),
+                    '?' => go!(self: error; clear_comment; push_comment '?'; to BogusComment),
+                    c => match lower_ascii_letter(c) {
+                        Some(cl) => go!(self: create_tag StartTag cl; to TagName),
+                        None => go!(self: error; emit '<'; reconsume Data),
+                    },
+                }
+            },
+
+            //§ end-tag-open-state
+            states::EndTagOpen => loop {
+                match get_char!(self, input) {
+                    '>' => go!(self: error; to Data),
+                    '\0' => {
+                        go!(self: error; clear_comment; push_comment '\u{fffd}'; to BogusComment)
+                    },
+                    c => match lower_ascii_letter(c) {
+                        Some(cl) => go!(self: create_tag EndTag cl; to TagName),
+                        None => go!(self: error; clear_comment; push_comment c; to BogusComment),
+                    },
+                }
+            },
+
+            //§ tag-name-state
+            states::TagName => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName),
+                    '/' => go!(self: to SelfClosingStartTag),
+                    '>' => go!(self: emit_tag Data),
+                    '\0' => go!(self: error; push_tag '\u{fffd}'),
+                    c => go!(self: push_tag (c.to_ascii_lowercase())),
+                }
+            },
+
+            //§ script-data-escaped-less-than-sign-state
+            states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop {
+                match get_char!(self, input) {
+                    '/' => go!(self: clear_temp; to RawEndTagOpen ScriptDataEscaped Escaped),
+                    c => match lower_ascii_letter(c) {
+                        Some(cl) => go!(self: clear_temp; push_temp cl; emit '<'; emit c;
+                                    to ScriptDataEscapeStart DoubleEscaped),
+                        None => go!(self: emit '<'; reconsume RawData ScriptDataEscaped Escaped),
+                    },
+                }
+            },
+
+            //§ script-data-double-escaped-less-than-sign-state
+            states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop {
+                match get_char!(self, input) {
+                    '/' => go!(self: clear_temp; emit '/'; to ScriptDataDoubleEscapeEnd),
+                    _ => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped),
+                }
+            },
+
+            //§ rcdata-less-than-sign-state rawtext-less-than-sign-state script-data-less-than-sign-state
+            // otherwise
+            states::RawLessThanSign(kind) => loop {
+                match get_char!(self, input) {
+                    '/' => go!(self: clear_temp; to RawEndTagOpen kind),
+                    '!' if kind == ScriptData => {
+                        go!(self: emit '<'; emit '!'; to ScriptDataEscapeStart Escaped)
+                    },
+                    _ => go!(self: emit '<'; reconsume RawData kind),
+                }
+            },
+
+            //§ rcdata-end-tag-open-state rawtext-end-tag-open-state script-data-end-tag-open-state script-data-escaped-end-tag-open-state
+            states::RawEndTagOpen(kind) => loop {
+                let c = get_char!(self, input);
+                match lower_ascii_letter(c) {
+                    Some(cl) => go!(self: create_tag EndTag cl; push_temp c; to RawEndTagName kind),
+                    None => go!(self: emit '<'; emit '/'; reconsume RawData kind),
+                }
+            },
+
+            //§ rcdata-end-tag-name-state rawtext-end-tag-name-state script-data-end-tag-name-state script-data-escaped-end-tag-name-state
+            states::RawEndTagName(kind) => loop {
+                let c = get_char!(self, input);
+                if self.have_appropriate_end_tag() {
+                    match c {
+                        '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName),
+                        '/' => go!(self: to SelfClosingStartTag),
+                        '>' => go!(self: emit_tag Data),
+                        _ => (),
+                    }
+                }
+
+                match lower_ascii_letter(c) {
+                    Some(cl) => go!(self: push_tag cl; push_temp c),
+                    None => {
+                        go!(self: discard_tag; emit '<'; emit '/'; emit_temp; reconsume RawData kind)
+                    },
+                }
+            },
+
+            //§ script-data-double-escape-start-state
+            states::ScriptDataEscapeStart(DoubleEscaped) => loop {
+                let c = get_char!(self, input);
+                match c {
+                    '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => {
+                        let esc = if &*self.temp_buf == "script" {
+                            DoubleEscaped
+                        } else {
+                            Escaped
+                        };
+                        go!(self: emit c; to RawData ScriptDataEscaped esc);
+                    },
+                    _ => match lower_ascii_letter(c) {
+                        Some(cl) => go!(self: push_temp cl; emit c),
+                        None => go!(self: reconsume RawData ScriptDataEscaped Escaped),
+                    },
+                }
+            },
+
+            //§ script-data-escape-start-state
+            states::ScriptDataEscapeStart(Escaped) => loop {
+                match get_char!(self, input) {
+                    '-' => go!(self: emit '-'; to ScriptDataEscapeStartDash),
+                    _ => go!(self: reconsume RawData ScriptData),
+                }
+            },
+
+            //§ script-data-escape-start-dash-state
+            states::ScriptDataEscapeStartDash => loop {
+                match get_char!(self, input) {
+                    '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash Escaped),
+                    _ => go!(self: reconsume RawData ScriptData),
+                }
+            },
+
+            //§ script-data-escaped-dash-state script-data-double-escaped-dash-state
+            states::ScriptDataEscapedDash(kind) => loop {
+                match get_char!(self, input) {
+                    '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash kind),
+                    '<' => {
+                        if kind == DoubleEscaped {
+                            go!(self: emit '<');
+                        }
+                        go!(self: to RawLessThanSign ScriptDataEscaped kind);
+                    },
+                    '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
+                    c => go!(self: emit c; to RawData ScriptDataEscaped kind),
+                }
+            },
+
+            //§ script-data-escaped-dash-dash-state script-data-double-escaped-dash-dash-state
+            states::ScriptDataEscapedDashDash(kind) => loop {
+                match get_char!(self, input) {
+                    '-' => go!(self: emit '-'),
+                    '<' => {
+                        if kind == DoubleEscaped {
+                            go!(self: emit '<');
+                        }
+                        go!(self: to RawLessThanSign ScriptDataEscaped kind);
+                    },
+                    '>' => go!(self: emit '>'; to RawData ScriptData),
+                    '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
+                    c => go!(self: emit c; to RawData ScriptDataEscaped kind),
+                }
+            },
+
+            //§ script-data-double-escape-end-state
+            states::ScriptDataDoubleEscapeEnd => loop {
+                let c = get_char!(self, input);
+                match c {
+                    '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => {
+                        let esc = if &*self.temp_buf == "script" {
+                            Escaped
+                        } else {
+                            DoubleEscaped
+                        };
+                        go!(self: emit c; to RawData ScriptDataEscaped esc);
+                    },
+                    _ => match lower_ascii_letter(c) {
+                        Some(cl) => go!(self: push_temp cl; emit c),
+                        None => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped),
+                    },
+                }
+            },
+
+            //§ before-attribute-name-state
+            states::BeforeAttributeName => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => (),
+                    '/' => go!(self: to SelfClosingStartTag),
+                    '>' => go!(self: emit_tag Data),
+                    '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName),
+                    c => match lower_ascii_letter(c) {
+                        Some(cl) => go!(self: create_attr cl; to AttributeName),
+                        None => {
+                            go_match!(self: c,
+                            '"' , '\'' , '<' , '=' => error);
+                            go!(self: create_attr c; to AttributeName);
+                        },
+                    },
+                }
+            },
+
+            //§ attribute-name-state
+            states::AttributeName => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to AfterAttributeName),
+                    '/' => go!(self: to SelfClosingStartTag),
+                    '=' => go!(self: to BeforeAttributeValue),
+                    '>' => go!(self: emit_tag Data),
+                    '\0' => go!(self: error; push_name '\u{fffd}'),
+                    c => match lower_ascii_letter(c) {
+                        Some(cl) => go!(self: push_name cl),
+                        None => {
+                            go_match!(self: c,
+                            '"' , '\'' , '<' => error);
+                            go!(self: push_name c);
+                        },
+                    },
+                }
+            },
+
+            //§ after-attribute-name-state
+            states::AfterAttributeName => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => (),
+                    '/' => go!(self: to SelfClosingStartTag),
+                    '=' => go!(self: to BeforeAttributeValue),
+                    '>' => go!(self: emit_tag Data),
+                    '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName),
+                    c => match lower_ascii_letter(c) {
+                        Some(cl) => go!(self: create_attr cl; to AttributeName),
+                        None => {
+                            go_match!(self: c,
+                            '"' , '\'' , '<' => error);
+                            go!(self: create_attr c; to AttributeName);
+                        },
+                    },
+                }
+            },
+
+            //§ before-attribute-value-state
+            // Use peek so we can handle the first attr character along with the rest,
+            // hopefully in the same zero-copy buffer.
+            states::BeforeAttributeValue => loop {
+                match peek!(self, input) {
+                    '\t' | '\n' | '\r' | '\x0C' | ' ' => go!(self: discard_char input),
+                    '"' => go!(self: discard_char input; to AttributeValue DoubleQuoted),
+                    '\'' => go!(self: discard_char input; to AttributeValue SingleQuoted),
+                    '\0' => {
+                        go!(self: discard_char input; error; push_value '\u{fffd}'; to AttributeValue Unquoted)
+                    },
+                    '>' => go!(self: discard_char input; error; emit_tag Data),
+                    _ => go!(self: to AttributeValue Unquoted),
+                }
+            },
+
+            //§ attribute-value-(double-quoted)-state
+            states::AttributeValue(DoubleQuoted) => loop {
+                match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n')) {
+                    FromSet('"') => go!(self: to AfterAttributeValueQuoted),
+                    FromSet('&') => go!(self: consume_char_ref '"'),
+                    FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
+                    FromSet(c) => go!(self: push_value c),
+                    NotFromSet(ref b) => go!(self: append_value b),
+                }
+            },
+
+            //§ attribute-value-(single-quoted)-state
+            states::AttributeValue(SingleQuoted) => loop {
+                match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n')) {
+                    FromSet('\'') => go!(self: to AfterAttributeValueQuoted),
+                    FromSet('&') => go!(self: consume_char_ref '\''),
+                    FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
+                    FromSet(c) => go!(self: push_value c),
+                    NotFromSet(ref b) => go!(self: append_value b),
+                }
+            },
+
+            //§ attribute-value-(unquoted)-state
+            states::AttributeValue(Unquoted) => loop {
+                match pop_except_from!(
+                    self,
+                    input,
+                    small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0')
+                ) {
+                    FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => {
+                        go!(self: to BeforeAttributeName)
+                    },
+                    FromSet('&') => go!(self: consume_char_ref '>'),
+                    FromSet('>') => go!(self: emit_tag Data),
+                    FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
+                    FromSet(c) => {
+                        go_match!(self: c,
+                            '"' , '\'' , '<' , '=' , '`' => error);
+                        go!(self: push_value c);
+                    },
+                    NotFromSet(ref b) => go!(self: append_value b),
+                }
+            },
+
+            //§ after-attribute-value-(quoted)-state
+            states::AfterAttributeValueQuoted => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName),
+                    '/' => go!(self: to SelfClosingStartTag),
+                    '>' => go!(self: emit_tag Data),
+                    _ => go!(self: error; reconsume BeforeAttributeName),
+                }
+            },
+
+            //§ self-closing-start-tag-state
+            states::SelfClosingStartTag => loop {
+                match get_char!(self, input) {
+                    '>' => {
+                        self.current_tag_self_closing = true;
+                        go!(self: emit_tag Data);
+                    },
+                    _ => go!(self: error; reconsume BeforeAttributeName),
+                }
+            },
+
+            //§ comment-start-state
+            states::CommentStart => loop {
+                match get_char!(self, input) {
+                    '-' => go!(self: to CommentStartDash),
+                    '\0' => go!(self: error; push_comment '\u{fffd}'; to Comment),
+                    '>' => go!(self: error; emit_comment; to Data),
+                    c => go!(self: push_comment c; to Comment),
+                }
+            },
+
+            //§ comment-start-dash-state
+            states::CommentStartDash => loop {
+                match get_char!(self, input) {
+                    '-' => go!(self: to CommentEnd),
+                    '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment),
+                    '>' => go!(self: error; emit_comment; to Data),
+                    c => go!(self: push_comment '-'; push_comment c; to Comment),
+                }
+            },
+
+            //§ comment-state
+            states::Comment => loop {
+                match get_char!(self, input) {
+                    '-' => go!(self: to CommentEndDash),
+                    '\0' => go!(self: error; push_comment '\u{fffd}'),
+                    c => go!(self: push_comment c),
+                }
+            },
+
+            //§ comment-end-dash-state
+            states::CommentEndDash => loop {
+                match get_char!(self, input) {
+                    '-' => go!(self: to CommentEnd),
+                    '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment),
+                    c => go!(self: push_comment '-'; push_comment c; to Comment),
+                }
+            },
+
+            //§ comment-end-state
+            states::CommentEnd => loop {
+                match get_char!(self, input) {
+                    '>' => go!(self: emit_comment; to Data),
+                    '\0' => go!(self: error; append_comment "--\u{fffd}"; to Comment),
+                    '!' => go!(self: error; to CommentEndBang),
+                    '-' => go!(self: error; push_comment '-'),
+                    c => go!(self: error; append_comment "--"; push_comment c; to Comment),
+                }
+            },
+
+            //§ comment-end-bang-state
+            states::CommentEndBang => loop {
+                match get_char!(self, input) {
+                    '-' => go!(self: append_comment "--!"; to CommentEndDash),
+                    '>' => go!(self: emit_comment; to Data),
+                    '\0' => go!(self: error; append_comment "--!\u{fffd}"; to Comment),
+                    c => go!(self: append_comment "--!"; push_comment c; to Comment),
+                }
+            },
+
+            //§ doctype-state
+            states::Doctype => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeName),
+                    _ => go!(self: error; reconsume BeforeDoctypeName),
+                }
+            },
+
+            //§ before-doctype-name-state
+            states::BeforeDoctypeName => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => (),
+                    '\0' => {
+                        go!(self: error; create_doctype; push_doctype_name '\u{fffd}'; to DoctypeName)
+                    },
+                    '>' => go!(self: error; create_doctype; force_quirks; emit_doctype; to Data),
+                    c => go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase());
+                                  to DoctypeName),
+                }
+            },
+
+            //§ doctype-name-state
+            states::DoctypeName => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => go!(self: clear_temp; to AfterDoctypeName),
+                    '>' => go!(self: emit_doctype; to Data),
+                    '\0' => go!(self: error; push_doctype_name '\u{fffd}'),
+                    c => go!(self: push_doctype_name (c.to_ascii_lowercase())),
+                }
+            },
+
+            //§ after-doctype-name-state
+            states::AfterDoctypeName => loop {
+                if eat!(self, input, "public") {
+                    go!(self: to AfterDoctypeKeyword Public);
+                } else if eat!(self, input, "system") {
+                    go!(self: to AfterDoctypeKeyword System);
+                } else {
+                    match get_char!(self, input) {
+                        '\t' | '\n' | '\x0C' | ' ' => (),
+                        '>' => go!(self: emit_doctype; to Data),
+                        _ => go!(self: error; force_quirks; to BogusDoctype),
+                    }
+                }
+            },
+
+            //§ after-doctype-public-keyword-state after-doctype-system-keyword-state
+            states::AfterDoctypeKeyword(kind) => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier kind),
+                    '"' => {
+                        go!(self: error; clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind)
+                    },
+                    '\'' => {
+                        go!(self: error; clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind)
+                    },
+                    '>' => go!(self: error; force_quirks; emit_doctype; to Data),
+                    _ => go!(self: error; force_quirks; to BogusDoctype),
+                }
+            },
+
+            //§ before-doctype-public-identifier-state before-doctype-system-identifier-state
+            states::BeforeDoctypeIdentifier(kind) => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => (),
+                    '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind),
+                    '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind),
+                    '>' => go!(self: error; force_quirks; emit_doctype; to Data),
+                    _ => go!(self: error; force_quirks; to BogusDoctype),
+                }
+            },
+
+            //§ doctype-public-identifier-(double-quoted)-state doctype-system-identifier-(double-quoted)-state
+            states::DoctypeIdentifierDoubleQuoted(kind) => loop {
+                match get_char!(self, input) {
+                    '"' => go!(self: to AfterDoctypeIdentifier kind),
+                    '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'),
+                    '>' => go!(self: error; force_quirks; emit_doctype; to Data),
+                    c => go!(self: push_doctype_id kind c),
+                }
+            },
+
+            //§ doctype-public-identifier-(single-quoted)-state doctype-system-identifier-(single-quoted)-state
+            states::DoctypeIdentifierSingleQuoted(kind) => loop {
+                match get_char!(self, input) {
+                    '\'' => go!(self: to AfterDoctypeIdentifier kind),
+                    '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'),
+                    '>' => go!(self: error; force_quirks; emit_doctype; to Data),
+                    c => go!(self: push_doctype_id kind c),
+                }
+            },
+
+            //§ after-doctype-public-identifier-state
+            states::AfterDoctypeIdentifier(Public) => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => {
+                        go!(self: to BetweenDoctypePublicAndSystemIdentifiers)
+                    },
+                    '>' => go!(self: emit_doctype; to Data),
+                    '"' => {
+                        go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System)
+                    },
+                    '\'' => {
+                        go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System)
+                    },
+                    _ => go!(self: error; force_quirks; to BogusDoctype),
+                }
+            },
+
+            //§ after-doctype-system-identifier-state
+            states::AfterDoctypeIdentifier(System) => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => (),
+                    '>' => go!(self: emit_doctype; to Data),
+                    _ => go!(self: error; to BogusDoctype),
+                }
+            },
+
+            //§ between-doctype-public-and-system-identifiers-state
+            states::BetweenDoctypePublicAndSystemIdentifiers => loop {
+                match get_char!(self, input) {
+                    '\t' | '\n' | '\x0C' | ' ' => (),
+                    '>' => go!(self: emit_doctype; to Data),
+                    '"' => {
+                        go!(self: clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System)
+                    },
+                    '\'' => {
+                        go!(self: clear_doctype_id System; to DoctypeIdentifierSingleQuoted System)
+                    },
+                    _ => go!(self: error; force_quirks; to BogusDoctype),
+                }
+            },
+
+            //§ bogus-doctype-state
+            states::BogusDoctype => loop {
+                match get_char!(self, input) {
+                    '>' => go!(self: emit_doctype; to Data),
+                    _ => (),
+                }
+            },
+
+            //§ bogus-comment-state
+            states::BogusComment => loop {
+                match get_char!(self, input) {
+                    '>' => go!(self: emit_comment; to Data),
+                    '\0' => go!(self: push_comment '\u{fffd}'),
+                    c => go!(self: push_comment c),
+                }
+            },
+
+            //§ markup-declaration-open-state
+            states::MarkupDeclarationOpen => loop {
+                if eat_exact!(self, input, "--") {
+                    go!(self: clear_comment; to CommentStart);
+                } else if eat!(self, input, "doctype") {
+                    go!(self: to Doctype);
+                } else {
+                    if self
+                        .sink
+                        .adjusted_current_node_present_but_not_in_html_namespace()
+                    {
+                        if eat_exact!(self, input, "[CDATA[") {
+                            go!(self: clear_temp; to CdataSection);
+                        }
+                    }
+                    go!(self: error; to BogusComment);
+                }
+            },
+
+            //§ cdata-section-state
+            states::CdataSection => loop {
+                match get_char!(self, input) {
+                    ']' => go!(self: to CdataSectionBracket),
+                    '\0' => go!(self: emit_temp; emit '\0'),
+                    c => go!(self: push_temp c),
+                }
+            },
+
+            //§ cdata-section-bracket
+            states::CdataSectionBracket => match get_char!(self, input) {
+                ']' => go!(self: to CdataSectionEnd),
+                _ => go!(self: push_temp ']'; reconsume CdataSection),
+            },
+
+            //§ cdata-section-end
+            states::CdataSectionEnd => loop {
+                match get_char!(self, input) {
+                    ']' => go!(self: push_temp ']'),
+                    '>' => go!(self: emit_temp; to Data),
+                    _ => go!(self: push_temp ']'; push_temp ']'; reconsume CdataSection),
+                }
+            },
+            //§ END
+        }
+    }
+
+    fn step_char_ref_tokenizer(&mut self, input: &mut BufferQueue) -> ProcessResult<Sink::Handle> {
+        // FIXME HACK: Take and replace the tokenizer so we don't
+        // double-mut-borrow self.  This is why it's boxed.
+        let mut tok = self.char_ref_tokenizer.take().unwrap();
+        let outcome = tok.step(self, input);
+
+        let progress = match outcome {
+            char_ref::Done => {
+                self.process_char_ref(tok.get_result());
+                return ProcessResult::Continue;
+            },
+
+            char_ref::Stuck => ProcessResult::Suspend,
+            char_ref::Progress => ProcessResult::Continue,
+        };
+
+        self.char_ref_tokenizer = Some(tok);
+        progress
+    }
+
+    fn process_char_ref(&mut self, char_ref: CharRef) {
+        let CharRef {
+            mut chars,
+            mut num_chars,
+        } = char_ref;
+
+        if num_chars == 0 {
+            chars[0] = '&';
+            num_chars = 1;
+        }
+
+        for i in 0..num_chars {
+            let c = chars[i as usize];
+            match self.state {
+                states::Data | states::RawData(states::Rcdata) => go!(self: emit c),
+
+                states::AttributeValue(_) => go!(self: push_value c),
+
+                _ => panic!(
+                    "state {:?} should not be reachable in process_char_ref",
+                    self.state
+                ),
+            }
+        }
+    }
+
+    /// Indicate that we have reached the end of the input.
+    pub fn end(&mut self) {
+        // Handle EOF in the char ref sub-tokenizer, if there is one.
+        // Do this first because it might un-consume stuff.
+        let mut input = BufferQueue::new();
+        match self.char_ref_tokenizer.take() {
+            None => (),
+            Some(mut tok) => {
+                tok.end_of_file(self, &mut input);
+                self.process_char_ref(tok.get_result());
+            },
+        }
+
+        // Process all remaining buffered input.
+        // If we're waiting for lookahead, we're not gonna get it.
+        self.at_eof = true;
+        assert!(matches!(self.run(&mut input), TokenizerResult::Done));
+        assert!(input.is_empty());
+
+        loop {
+            match self.eof_step() {
+                ProcessResult::Continue => (),
+                ProcessResult::Suspend => break,
+                ProcessResult::Script(_) => unreachable!(),
+            }
+        }
+
+        self.sink.end();
+
+        if self.opts.profile {
+            self.dump_profile();
+        }
+    }
+
+    fn dump_profile(&self) {
+        let mut results: Vec<(states::State, u64)> =
+            self.state_profile.iter().map(|(s, t)| (*s, *t)).collect();
+        results.sort_by(|&(_, x), &(_, y)| y.cmp(&x));
+
+        let total: u64 = results
+            .iter()
+            .map(|&(_, t)| t)
+            .fold(0, ::std::ops::Add::add);
+        println!("\nTokenizer profile, in nanoseconds");
+        println!("\n{:12}         total in token sink", self.time_in_sink);
+        println!("\n{:12}         total in tokenizer", total);
+
+        for (k, v) in results.into_iter() {
+            let pct = 100.0 * (v as f64) / (total as f64);
+            println!("{:12}  {:4.1}%  {:?}", v, pct, k);
+        }
+    }
+
+    fn eof_step(&mut self) -> ProcessResult<Sink::Handle> {
+        debug!("processing EOF in state {:?}", self.state);
+        match self.state {
+            states::Data |
+            states::RawData(Rcdata) |
+            states::RawData(Rawtext) |
+            states::RawData(ScriptData) |
+            states::Plaintext => go!(self: eof),
+
+            states::TagName |
+            states::RawData(ScriptDataEscaped(_)) |
+            states::BeforeAttributeName |
+            states::AttributeName |
+            states::AfterAttributeName |
+            states::BeforeAttributeValue |
+            states::AttributeValue(_) |
+            states::AfterAttributeValueQuoted |
+            states::SelfClosingStartTag |
+            states::ScriptDataEscapedDash(_) |
+            states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data),
+
+            states::TagOpen => go!(self: error_eof; emit '<'; to Data),
+
+            states::EndTagOpen => go!(self: error_eof; emit '<'; emit '/'; to Data),
+
+            states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => {
+                go!(self: to RawData ScriptDataEscaped DoubleEscaped)
+            },
+
+            states::RawLessThanSign(kind) => go!(self: emit '<'; to RawData kind),
+
+            states::RawEndTagOpen(kind) => go!(self: emit '<'; emit '/'; to RawData kind),
+
+            states::RawEndTagName(kind) => {
+                go!(self: emit '<'; emit '/'; emit_temp; to RawData kind)
+            },
+
+            states::ScriptDataEscapeStart(kind) => go!(self: to RawData ScriptDataEscaped kind),
+
+            states::ScriptDataEscapeStartDash => go!(self: to RawData ScriptData),
+
+            states::ScriptDataDoubleEscapeEnd => {
+                go!(self: to RawData ScriptDataEscaped DoubleEscaped)
+            },
+
+            states::CommentStart |
+            states::CommentStartDash |
+            states::Comment |
+            states::CommentEndDash |
+            states::CommentEnd |
+            states::CommentEndBang => go!(self: error_eof; emit_comment; to Data),
+
+            states::Doctype | states::BeforeDoctypeName => {
+                go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data)
+            },
+
+            states::DoctypeName |
+            states::AfterDoctypeName |
+            states::AfterDoctypeKeyword(_) |
+            states::BeforeDoctypeIdentifier(_) |
+            states::DoctypeIdentifierDoubleQuoted(_) |
+            states::DoctypeIdentifierSingleQuoted(_) |
+            states::AfterDoctypeIdentifier(_) |
+            states::BetweenDoctypePublicAndSystemIdentifiers => {
+                go!(self: error_eof; force_quirks; emit_doctype; to Data)
+            },
+
+            states::BogusDoctype => go!(self: emit_doctype; to Data),
+
+            states::BogusComment => go!(self: emit_comment; to Data),
+
+            states::MarkupDeclarationOpen => go!(self: error; to BogusComment),
+
+            states::CdataSection => go!(self: emit_temp; error_eof; to Data),
+
+            states::CdataSectionBracket => go!(self: push_temp ']'; to CdataSection),
+
+            states::CdataSectionEnd => go!(self: push_temp ']'; push_temp ']'; to CdataSection),
+        }
+    }
+}
+
+#[cfg(test)]
+#[allow(non_snake_case)]
+mod test {
+    use super::option_push; // private items
+    use crate::tendril::{SliceExt, StrTendril};
+
+    use super::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts};
+
+    use super::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
+    use super::interface::{EndTag, StartTag, Tag, TagKind};
+    use super::interface::{TagToken, Token};
+
+    use markup5ever::buffer_queue::BufferQueue;
+    use std::mem::replace;
+
+    use crate::LocalName;
+
+    // LinesMatch implements the TokenSink trait. It is used for testing to see
+    // if current_line is being updated when process_token is called. The lines
+    // vector is a collection of the line numbers that each token is on.
+    struct LinesMatch {
+        tokens: Vec<Token>,
+        current_str: StrTendril,
+        lines: Vec<(Token, u64)>,
+    }
+
+    impl LinesMatch {
+        fn new() -> LinesMatch {
+            LinesMatch {
+                tokens: vec![],
+                current_str: StrTendril::new(),
+                lines: vec![],
+            }
+        }
+
+        fn push(&mut self, token: Token, line_number: u64) {
+            self.finish_str();
+            self.lines.push((token, line_number));
+        }
+
+        fn finish_str(&mut self) {
+            if self.current_str.len() > 0 {
+                let s = replace(&mut self.current_str, StrTendril::new());
+                self.tokens.push(CharacterTokens(s));
+            }
+        }
+    }
+
+    impl TokenSink for LinesMatch {
+        type Handle = ();
+
+        fn process_token(
+            &mut self,
+            token: Token,
+            line_number: u64,
+        ) -> TokenSinkResult<Self::Handle> {
+            match token {
+                CharacterTokens(b) => {
+                    self.current_str.push_slice(&b);
+                },
+
+                NullCharacterToken => {
+                    self.current_str.push_char('\0');
+                },
+
+                ParseError(_) => {
+                    panic!("unexpected parse error");
+                },
+
+                TagToken(mut t) => {
+                    // The spec seems to indicate that one can emit
+                    // erroneous end tags with attrs, but the test
+                    // cases don't contain them.
+                    match t.kind {
+                        EndTag => {
+                            t.self_closing = false;
+                            t.attrs = vec![];
+                        },
+                        _ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)),
+                    }
+                    self.push(TagToken(t), line_number);
+                },
+
+                EOFToken => (),
+
+                _ => self.push(token, line_number),
+            }
+            TokenSinkResult::Continue
+        }
+    }
+
+    // Take in tokens, process them, and return vector with line
+    // numbers that each token is on
+    fn tokenize(input: Vec<StrTendril>, opts: TokenizerOpts) -> Vec<(Token, u64)> {
+        let sink = LinesMatch::new();
+        let mut tok = Tokenizer::new(sink, opts);
+        let mut buffer = BufferQueue::new();
+        for chunk in input.into_iter() {
+            buffer.push_back(chunk);
+            let _ = tok.feed(&mut buffer);
+        }
+        tok.end();
+        tok.sink.lines
+    }
+
+    // Create a tag token
+    fn create_tag(token: StrTendril, tagkind: TagKind) -> Token {
+        let name = LocalName::from(&*token);
+        let token = TagToken(Tag {
+            kind: tagkind,
+            name,
+            self_closing: false,
+            attrs: vec![],
+        });
+        token
+    }
+
+    #[test]
+    fn push_to_None_gives_singleton() {
+        let mut s: Option<StrTendril> = None;
+        option_push(&mut s, 'x');
+        assert_eq!(s, Some("x".to_tendril()));
+    }
+
+    #[test]
+    fn push_to_empty_appends() {
+        let mut s: Option<StrTendril> = Some(StrTendril::new());
+        option_push(&mut s, 'x');
+        assert_eq!(s, Some("x".to_tendril()));
+    }
+
+    #[test]
+    fn push_to_nonempty_appends() {
+        let mut s: Option<StrTendril> = Some(StrTendril::from_slice("y"));
+        option_push(&mut s, 'x');
+        assert_eq!(s, Some("yx".to_tendril()));
+    }
+
+    #[test]
+    fn check_lines() {
+        let opts = TokenizerOpts {
+            exact_errors: false,
+            discard_bom: true,
+            profile: false,
+            initial_state: None,
+            last_start_tag_name: None,
+        };
+        let vector = vec![
+            StrTendril::from("<a>\n"),
+            StrTendril::from("<b>\n"),
+            StrTendril::from("</b>\n"),
+            StrTendril::from("</a>\n"),
+        ];
+        let expected = vec![
+            (create_tag(StrTendril::from("a"), StartTag), 1),
+            (create_tag(StrTendril::from("b"), StartTag), 2),
+            (create_tag(StrTendril::from("b"), EndTag), 3),
+            (create_tag(StrTendril::from("a"), EndTag), 4),
+        ];
+        let results = tokenize(vector, opts);
+        assert_eq!(results, expected);
+    }
+
+    #[test]
+    fn check_lines_with_new_line() {
+        let opts = TokenizerOpts {
+            exact_errors: false,
+            discard_bom: true,
+            profile: false,
+            initial_state: None,
+            last_start_tag_name: None,
+        };
+        let vector = vec![
+            StrTendril::from("<a>\r\n"),
+            StrTendril::from("<b>\r\n"),
+            StrTendril::from("</b>\r\n"),
+            StrTendril::from("</a>\r\n"),
+        ];
+        let expected = vec![
+            (create_tag(StrTendril::from("a"), StartTag), 1),
+            (create_tag(StrTendril::from("b"), StartTag), 2),
+            (create_tag(StrTendril::from("b"), EndTag), 3),
+            (create_tag(StrTendril::from("a"), EndTag), 4),
+        ];
+        let results = tokenize(vector, opts);
+        assert_eq!(results, expected);
+    }
+}
diff --git a/src/tokenizer/states.rs b/src/tokenizer/states.rs
new file mode 100644
index 0000000..d455e9a
--- /dev/null
+++ b/src/tokenizer/states.rs
@@ -0,0 +1,93 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Tokenizer states.
+//!
+//! This is public for use by the tokenizer tests.  Other library
+//! users should not have to care about this.
+
+pub use self::AttrValueKind::*;
+pub use self::DoctypeIdKind::*;
+pub use self::RawKind::*;
+pub use self::ScriptEscapeKind::*;
+pub use self::State::*;
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
+pub enum ScriptEscapeKind {
+    Escaped,
+    DoubleEscaped,
+}
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
+pub enum DoctypeIdKind {
+    Public,
+    System,
+}
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
+pub enum RawKind {
+    Rcdata,
+    Rawtext,
+    ScriptData,
+    ScriptDataEscaped(ScriptEscapeKind),
+}
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
+pub enum AttrValueKind {
+    Unquoted,
+    SingleQuoted,
+    DoubleQuoted,
+}
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
+pub enum State {
+    Data,
+    Plaintext,
+    TagOpen,
+    EndTagOpen,
+    TagName,
+    RawData(RawKind),
+    RawLessThanSign(RawKind),
+    RawEndTagOpen(RawKind),
+    RawEndTagName(RawKind),
+    ScriptDataEscapeStart(ScriptEscapeKind),
+    ScriptDataEscapeStartDash,
+    ScriptDataEscapedDash(ScriptEscapeKind),
+    ScriptDataEscapedDashDash(ScriptEscapeKind),
+    ScriptDataDoubleEscapeEnd,
+    BeforeAttributeName,
+    AttributeName,
+    AfterAttributeName,
+    BeforeAttributeValue,
+    AttributeValue(AttrValueKind),
+    AfterAttributeValueQuoted,
+    SelfClosingStartTag,
+    BogusComment,
+    MarkupDeclarationOpen,
+    CommentStart,
+    CommentStartDash,
+    Comment,
+    CommentEndDash,
+    CommentEnd,
+    CommentEndBang,
+    Doctype,
+    BeforeDoctypeName,
+    DoctypeName,
+    AfterDoctypeName,
+    AfterDoctypeKeyword(DoctypeIdKind),
+    BeforeDoctypeIdentifier(DoctypeIdKind),
+    DoctypeIdentifierDoubleQuoted(DoctypeIdKind),
+    DoctypeIdentifierSingleQuoted(DoctypeIdKind),
+    AfterDoctypeIdentifier(DoctypeIdKind),
+    BetweenDoctypePublicAndSystemIdentifiers,
+    BogusDoctype,
+    CdataSection,
+    CdataSectionBracket,
+    CdataSectionEnd,
+}
diff --git a/src/tree_builder/data.rs b/src/tree_builder/data.rs
new file mode 100644
index 0000000..9d51a71
--- /dev/null
+++ b/src/tree_builder/data.rs
@@ -0,0 +1,171 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
+use crate::tendril::StrTendril;
+use crate::tokenizer::Doctype;
+
+// These should all be lowercase, for ASCII-case-insensitive matching.
+static QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[
+    "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+    "-//as//dtd html 3.0 aswedit + extensions//",
+    "-//ietf//dtd html 2.0 level 1//",
+    "-//ietf//dtd html 2.0 level 2//",
+    "-//ietf//dtd html 2.0 strict level 1//",
+    "-//ietf//dtd html 2.0 strict level 2//",
+    "-//ietf//dtd html 2.0 strict//",
+    "-//ietf//dtd html 2.0//",
+    "-//ietf//dtd html 2.1e//",
+    "-//ietf//dtd html 3.0//",
+    "-//ietf//dtd html 3.2 final//",
+    "-//ietf//dtd html 3.2//",
+    "-//ietf//dtd html 3//",
+    "-//ietf//dtd html level 0//",
+    "-//ietf//dtd html level 1//",
+    "-//ietf//dtd html level 2//",
+    "-//ietf//dtd html level 3//",
+    "-//ietf//dtd html strict level 0//",
+    "-//ietf//dtd html strict level 1//",
+    "-//ietf//dtd html strict level 2//",
+    "-//ietf//dtd html strict level 3//",
+    "-//ietf//dtd html strict//",
+    "-//ietf//dtd html//",
+    "-//metrius//dtd metrius presentational//",
+    "-//microsoft//dtd internet explorer 2.0 html strict//",
+    "-//microsoft//dtd internet explorer 2.0 html//",
+    "-//microsoft//dtd internet explorer 2.0 tables//",
+    "-//microsoft//dtd internet explorer 3.0 html strict//",
+    "-//microsoft//dtd internet explorer 3.0 html//",
+    "-//microsoft//dtd internet explorer 3.0 tables//",
+    "-//netscape comm. corp.//dtd html//",
+    "-//netscape comm. corp.//dtd strict html//",
+    "-//o'reilly and associates//dtd html 2.0//",
+    "-//o'reilly and associates//dtd html extended 1.0//",
+    "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+    "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+    "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+    "-//spyglass//dtd html 2.0 extended//",
+    "-//sq//dtd html 2.0 hotmetal + extensions//",
+    "-//sun microsystems corp.//dtd hotjava html//",
+    "-//sun microsystems corp.//dtd hotjava strict html//",
+    "-//w3c//dtd html 3 1995-03-24//",
+    "-//w3c//dtd html 3.2 draft//",
+    "-//w3c//dtd html 3.2 final//",
+    "-//w3c//dtd html 3.2//",
+    "-//w3c//dtd html 3.2s draft//",
+    "-//w3c//dtd html 4.0 frameset//",
+    "-//w3c//dtd html 4.0 transitional//",
+    "-//w3c//dtd html experimental 19960712//",
+    "-//w3c//dtd html experimental 970421//",
+    "-//w3c//dtd w3 html//",
+    "-//w3o//dtd w3 html 3.0//",
+    "-//webtechs//dtd mozilla html 2.0//",
+    "-//webtechs//dtd mozilla html//",
+];
+
+static QUIRKY_PUBLIC_MATCHES: &'static [&'static str] = &[
+    "-//w3o//dtd w3 html strict 3.0//en//",
+    "-/w3c/dtd html 4.0 transitional/en",
+    "html",
+];
+
+static QUIRKY_SYSTEM_MATCHES: &'static [&'static str] =
+    &["http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"];
+
+static LIMITED_QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[
+    "-//w3c//dtd xhtml 1.0 frameset//",
+    "-//w3c//dtd xhtml 1.0 transitional//",
+];
+
+static HTML4_PUBLIC_PREFIXES: &'static [&'static str] = &[
+    "-//w3c//dtd html 4.01 frameset//",
+    "-//w3c//dtd html 4.01 transitional//",
+];
+
+pub fn doctype_error_and_quirks(doctype: &Doctype, iframe_srcdoc: bool) -> (bool, QuirksMode) {
+    fn opt_string_as_slice<'t>(x: &'t Option<String>) -> Option<&'t str> {
+        x.as_ref().map(|y| &y[..])
+    }
+
+    fn opt_tendril_as_slice<'t>(x: &'t Option<StrTendril>) -> Option<&'t str> {
+        match *x {
+            Some(ref t) => Some(t),
+            None => None,
+        }
+    }
+
+    fn opt_to_ascii_lower(x: Option<&str>) -> Option<String> {
+        x.map(|y| y.to_ascii_lowercase())
+    }
+
+    let name = opt_tendril_as_slice(&doctype.name);
+    let public = opt_tendril_as_slice(&doctype.public_id);
+    let system = opt_tendril_as_slice(&doctype.system_id);
+
+    let err = match (name, public, system) {
+        (Some("html"), None, None) |
+        (Some("html"), None, Some("about:legacy-compat")) |
+        (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) |
+        (
+            Some("html"),
+            Some("-//W3C//DTD HTML 4.0//EN"),
+            Some("http://www.w3.org/TR/REC-html40/strict.dtd"),
+        ) |
+        (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) |
+        (
+            Some("html"),
+            Some("-//W3C//DTD HTML 4.01//EN"),
+            Some("http://www.w3.org/TR/html4/strict.dtd"),
+        ) |
+        (
+            Some("html"),
+            Some("-//W3C//DTD XHTML 1.0 Strict//EN"),
+            Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"),
+        ) |
+        (
+            Some("html"),
+            Some("-//W3C//DTD XHTML 1.1//EN"),
+            Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"),
+        ) => false,
+
+        _ => true,
+    };
+
+    // FIXME: We could do something asymptotically faster here.
+    // But there aren't many strings, and this happens at most once per parse.
+    fn contains_pfx(haystack: &[&str], needle: &str) -> bool {
+        haystack.iter().any(|&x| needle.starts_with(x))
+    }
+
+    // Quirks-mode matches are case-insensitive.
+    let public = opt_to_ascii_lower(public);
+    let system = opt_to_ascii_lower(system);
+
+    let quirk = match (opt_string_as_slice(&public), opt_string_as_slice(&system)) {
+        _ if doctype.force_quirks => Quirks,
+        _ if name != Some("html") => Quirks,
+
+        _ if iframe_srcdoc => NoQuirks,
+
+        (Some(ref p), _) if QUIRKY_PUBLIC_MATCHES.contains(p) => Quirks,
+        (_, Some(ref s)) if QUIRKY_SYSTEM_MATCHES.contains(s) => Quirks,
+
+        (Some(p), _) if contains_pfx(QUIRKY_PUBLIC_PREFIXES, p) => Quirks,
+        (Some(p), _) if contains_pfx(LIMITED_QUIRKY_PUBLIC_PREFIXES, p) => LimitedQuirks,
+
+        (Some(p), s) if contains_pfx(HTML4_PUBLIC_PREFIXES, p) => match s {
+            None => Quirks,
+            Some(_) => LimitedQuirks,
+        },
+
+        _ => NoQuirks,
+    };
+
+    (err, quirk)
+}
diff --git a/src/tree_builder/mod.rs b/src/tree_builder/mod.rs
new file mode 100644
index 0000000..a6fa8bf
--- /dev/null
+++ b/src/tree_builder/mod.rs
@@ -0,0 +1,1681 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(warnings)]
+
+//! The HTML5 tree builder.
+
+pub use crate::interface::{create_element, ElementFlags, NextParserState, Tracer, TreeSink};
+pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText};
+pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
+
+use self::types::*;
+
+use crate::tendril::StrTendril;
+use crate::{ExpandedName, LocalName, Namespace, QualName};
+
+use crate::tokenizer;
+use crate::tokenizer::states as tok_state;
+use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult};
+
+use crate::util::str::is_ascii_whitespace;
+
+use std::borrow::Cow::Borrowed;
+use std::collections::VecDeque;
+use std::default::Default;
+use std::iter::{Enumerate, Rev};
+use std::mem::replace;
+use std::{fmt, slice};
+
+use crate::tokenizer::states::{RawData, RawKind};
+use crate::tree_builder::tag_sets::*;
+use crate::tree_builder::types::*;
+use crate::util::str::to_escaped_string;
+use log::{debug, log_enabled, warn, Level};
+use mac::{_tt_as_expr_hack, format_if, matches};
+
+pub use self::PushFlag::*;
+
+#[macro_use]
+mod tag_sets;
+
+mod data;
+mod types;
+
+include!(concat!(env!("OUT_DIR"), "/rules.rs"));
+
+/// Tree builder options, with an impl for Default.
+#[derive(Copy, Clone)]
+pub struct TreeBuilderOpts {
+    /// Report all parse errors described in the spec, at some
+    /// performance penalty?  Default: false
+    pub exact_errors: bool,
+
+    /// Is scripting enabled?
+    pub scripting_enabled: bool,
+
+    /// Is this an `iframe srcdoc` document?
+    pub iframe_srcdoc: bool,
+
+    /// Should we drop the DOCTYPE (if any) from the tree?
+    pub drop_doctype: bool,
+
+    /// Obsolete, ignored.
+    pub ignore_missing_rules: bool,
+
+    /// Initial TreeBuilder quirks mode. Default: NoQuirks
+    pub quirks_mode: QuirksMode,
+}
+
+impl Default for TreeBuilderOpts {
+    fn default() -> TreeBuilderOpts {
+        TreeBuilderOpts {
+            exact_errors: false,
+            scripting_enabled: true,
+            iframe_srcdoc: false,
+            drop_doctype: false,
+            ignore_missing_rules: false,
+            quirks_mode: NoQuirks,
+        }
+    }
+}
+
+/// The HTML tree builder.
+pub struct TreeBuilder<Handle, Sink> {
+    /// Options controlling the behavior of the tree builder.
+    opts: TreeBuilderOpts,
+
+    /// Consumer of tree modifications.
+    pub sink: Sink,
+
+    /// Insertion mode.
+    mode: InsertionMode,
+
+    /// Original insertion mode, used by Text and InTableText modes.
+    orig_mode: Option<InsertionMode>,
+
+    /// Stack of template insertion modes.
+    template_modes: Vec<InsertionMode>,
+
+    /// Pending table character tokens.
+    pending_table_text: Vec<(SplitStatus, StrTendril)>,
+
+    /// Quirks mode as set by the parser.
+    /// FIXME: can scripts etc. change this?
+    quirks_mode: QuirksMode,
+
+    /// The document node, which is created by the sink.
+    doc_handle: Handle,
+
+    /// Stack of open elements, most recently added at end.
+    open_elems: Vec<Handle>,
+
+    /// List of active formatting elements.
+    active_formatting: Vec<FormatEntry<Handle>>,
+
+    //§ the-element-pointers
+    /// Head element pointer.
+    head_elem: Option<Handle>,
+
+    /// Form element pointer.
+    form_elem: Option<Handle>,
+    //§ END
+    /// Frameset-ok flag.
+    frameset_ok: bool,
+
+    /// Ignore a following U+000A LINE FEED?
+    ignore_lf: bool,
+
+    /// Is foster parenting enabled?
+    foster_parenting: bool,
+
+    /// The context element for the fragment parsing algorithm.
+    context_elem: Option<Handle>,
+
+    /// Track current line
+    current_line: u64,
+    // WARNING: If you add new fields that contain Handles, you
+    // must add them to trace_handles() below to preserve memory
+    // safety!
+    //
+    // FIXME: Auto-generate the trace hooks like Servo does.
+}
+
+impl<Handle, Sink> TreeBuilder<Handle, Sink>
+where
+    Handle: Clone,
+    Sink: TreeSink<Handle = Handle>,
+{
+    /// Create a new tree builder which sends tree modifications to a particular `TreeSink`.
+    ///
+    /// The tree builder is also a `TokenSink`.
+    pub fn new(mut sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> {
+        let doc_handle = sink.get_document();
+        TreeBuilder {
+            opts: opts,
+            sink: sink,
+            mode: Initial,
+            orig_mode: None,
+            template_modes: vec![],
+            pending_table_text: vec![],
+            quirks_mode: opts.quirks_mode,
+            doc_handle: doc_handle,
+            open_elems: vec![],
+            active_formatting: vec![],
+            head_elem: None,
+            form_elem: None,
+            frameset_ok: true,
+            ignore_lf: false,
+            foster_parenting: false,
+            context_elem: None,
+            current_line: 1,
+        }
+    }
+
+    /// Create a new tree builder which sends tree modifications to a particular `TreeSink`.
+    /// This is for parsing fragments.
+    ///
+    /// The tree builder is also a `TokenSink`.
+    pub fn new_for_fragment(
+        mut sink: Sink,
+        context_elem: Handle,
+        form_elem: Option<Handle>,
+        opts: TreeBuilderOpts,
+    ) -> TreeBuilder<Handle, Sink> {
+        let doc_handle = sink.get_document();
+        let context_is_template = sink.elem_name(&context_elem) == expanded_name!(html "template");
+        let mut tb = TreeBuilder {
+            opts: opts,
+            sink: sink,
+            mode: Initial,
+            orig_mode: None,
+            template_modes: if context_is_template {
+                vec![InTemplate]
+            } else {
+                vec![]
+            },
+            pending_table_text: vec![],
+            quirks_mode: opts.quirks_mode,
+            doc_handle: doc_handle,
+            open_elems: vec![],
+            active_formatting: vec![],
+            head_elem: None,
+            form_elem: form_elem,
+            frameset_ok: true,
+            ignore_lf: false,
+            foster_parenting: false,
+            context_elem: Some(context_elem),
+            current_line: 1,
+        };
+
+        // https://html.spec.whatwg.org/multipage/#parsing-html-fragments
+        // 5. Let root be a new html element with no attributes.
+        // 6. Append the element root to the Document node created above.
+        // 7. Set up the parser's stack of open elements so that it contains just the single element root.
+        tb.create_root(vec![]);
+        // 10. Reset the parser's insertion mode appropriately.
+        tb.mode = tb.reset_insertion_mode();
+
+        tb
+    }
+
+    // https://html.spec.whatwg.org/multipage/#concept-frag-parse-context
+    // Step 4. Set the state of the HTML parser's tokenization stage as follows:
+    pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State {
+        let elem = self.context_elem.as_ref().expect("no context element");
+        let name = match self.sink.elem_name(elem) {
+            ExpandedName {
+                ns: &ns!(html),
+                local,
+            } => local,
+            _ => return tok_state::Data,
+        };
+        match *name {
+            local_name!("title") | local_name!("textarea") => tok_state::RawData(tok_state::Rcdata),
+
+            local_name!("style") |
+            local_name!("xmp") |
+            local_name!("iframe") |
+            local_name!("noembed") |
+            local_name!("noframes") => tok_state::RawData(tok_state::Rawtext),
+
+            local_name!("script") => tok_state::RawData(tok_state::ScriptData),
+
+            local_name!("noscript") => {
+                if self.opts.scripting_enabled {
+                    tok_state::RawData(tok_state::Rawtext)
+                } else {
+                    tok_state::Data
+                }
+            },
+
+            local_name!("plaintext") => tok_state::Plaintext,
+
+            _ => tok_state::Data,
+        }
+    }
+
+    /// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's
+    /// internal state.  This is intended to support garbage-collected DOMs.
+    pub fn trace_handles(&self, tracer: &Tracer<Handle = Handle>) {
+        tracer.trace_handle(&self.doc_handle);
+        for e in &self.open_elems {
+            tracer.trace_handle(e);
+        }
+        for e in &self.active_formatting {
+            match e {
+                &Element(ref h, _) => tracer.trace_handle(h),
+                _ => (),
+            }
+        }
+        self.head_elem.as_ref().map(|h| tracer.trace_handle(h));
+        self.form_elem.as_ref().map(|h| tracer.trace_handle(h));
+        self.context_elem.as_ref().map(|h| tracer.trace_handle(h));
+    }
+
+    #[allow(dead_code)]
+    fn dump_state(&self, label: String) {
+        println!("dump_state on {}", label);
+        print!("    open_elems:");
+        for node in self.open_elems.iter() {
+            let name = self.sink.elem_name(node);
+            match *name.ns {
+                ns!(html) => print!(" {}", name.local),
+                _ => panic!(),
+            }
+        }
+        println!("");
+        print!("    active_formatting:");
+        for entry in self.active_formatting.iter() {
+            match entry {
+                &Marker => print!(" Marker"),
+                &Element(ref h, _) => {
+                    let name = self.sink.elem_name(h);
+                    match *name.ns {
+                        ns!(html) => print!(" {}", name.local),
+                        _ => panic!(),
+                    }
+                },
+            }
+        }
+        println!("");
+    }
+
+    fn debug_step(&self, mode: InsertionMode, token: &Token) {
+        if log_enabled!(Level::Debug) {
+            debug!(
+                "processing {} in insertion mode {:?}",
+                to_escaped_string(token),
+                mode
+            );
+        }
+    }
+
+    fn process_to_completion(&mut self, mut token: Token) -> TokenSinkResult<Handle> {
+        // Queue of additional tokens yet to be processed.
+        // This stays empty in the common case where we don't split whitespace.
+        let mut more_tokens = VecDeque::new();
+
+        loop {
+            let should_have_acknowledged_self_closing_flag = matches!(
+                token,
+                TagToken(Tag {
+                    self_closing: true,
+                    kind: StartTag,
+                    ..
+                })
+            );
+            let result = if self.is_foreign(&token) {
+                self.step_foreign(token)
+            } else {
+                let mode = self.mode;
+                self.step(mode, token)
+            };
+            match result {
+                Done => {
+                    if should_have_acknowledged_self_closing_flag {
+                        self.sink
+                            .parse_error(Borrowed("Unacknowledged self-closing tag"));
+                    }
+                    token = unwrap_or_return!(
+                        more_tokens.pop_front(),
+                        tokenizer::TokenSinkResult::Continue
+                    );
+                },
+                DoneAckSelfClosing => {
+                    token = unwrap_or_return!(
+                        more_tokens.pop_front(),
+                        tokenizer::TokenSinkResult::Continue
+                    );
+                },
+                Reprocess(m, t) => {
+                    self.mode = m;
+                    token = t;
+                },
+                ReprocessForeign(t) => {
+                    token = t;
+                },
+                SplitWhitespace(mut buf) => {
+                    let p = buf.pop_front_char_run(is_ascii_whitespace);
+                    let (first, is_ws) = unwrap_or_return!(p, tokenizer::TokenSinkResult::Continue);
+                    let status = if is_ws { Whitespace } else { NotWhitespace };
+                    token = CharacterTokens(status, first);
+
+                    if buf.len32() > 0 {
+                        more_tokens.push_back(CharacterTokens(NotSplit, buf));
+                    }
+                },
+                Script(node) => {
+                    assert!(more_tokens.is_empty());
+                    return tokenizer::TokenSinkResult::Script(node);
+                },
+                ToPlaintext => {
+                    assert!(more_tokens.is_empty());
+                    return tokenizer::TokenSinkResult::Plaintext;
+                },
+                ToRawData(k) => {
+                    assert!(more_tokens.is_empty());
+                    return tokenizer::TokenSinkResult::RawData(k);
+                },
+            }
+        }
+    }
+
+    /// Are we parsing a HTML fragment?
+    pub fn is_fragment(&self) -> bool {
+        self.context_elem.is_some()
+    }
+
+    /// https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node
+    fn appropriate_place_for_insertion(
+        &mut self,
+        override_target: Option<Handle>,
+    ) -> InsertionPoint<Handle> {
+        use self::tag_sets::*;
+
+        declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr");
+        let target = override_target.unwrap_or_else(|| self.current_node().clone());
+        if !(self.foster_parenting && self.elem_in(&target, foster_target)) {
+            if self.html_elem_named(&target, local_name!("template")) {
+                // No foster parenting (inside template).
+                let contents = self.sink.get_template_contents(&target);
+                return LastChild(contents);
+            } else {
+                // No foster parenting (the common case).
+                return LastChild(target);
+            }
+        }
+
+        // Foster parenting
+        let mut iter = self.open_elems.iter().rev().peekable();
+        while let Some(elem) = iter.next() {
+            if self.html_elem_named(&elem, local_name!("template")) {
+                let contents = self.sink.get_template_contents(&elem);
+                return LastChild(contents);
+            } else if self.html_elem_named(&elem, local_name!("table")) {
+                return TableFosterParenting {
+                    element: elem.clone(),
+                    prev_element: (*iter.peek().unwrap()).clone(),
+                };
+            }
+        }
+        let html_elem = self.html_elem();
+        LastChild(html_elem.clone())
+    }
+
+    fn insert_at(&mut self, insertion_point: InsertionPoint<Handle>, child: NodeOrText<Handle>) {
+        match insertion_point {
+            LastChild(parent) => self.sink.append(&parent, child),
+            BeforeSibling(sibling) => self.sink.append_before_sibling(&sibling, child),
+            TableFosterParenting {
+                element,
+                prev_element,
+            } => self
+                .sink
+                .append_based_on_parent_node(&element, &prev_element, child),
+        }
+    }
+}
+
+impl<Handle, Sink> TokenSink for TreeBuilder<Handle, Sink>
+where
+    Handle: Clone,
+    Sink: TreeSink<Handle = Handle>,
+{
+    type Handle = Handle;
+
+    fn process_token(
+        &mut self,
+        token: tokenizer::Token,
+        line_number: u64,
+    ) -> TokenSinkResult<Handle> {
+        if line_number != self.current_line {
+            self.sink.set_current_line(line_number);
+        }
+        let ignore_lf = replace(&mut self.ignore_lf, false);
+
+        // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type.
+        let token = match token {
+            tokenizer::ParseError(e) => {
+                self.sink.parse_error(e);
+                return tokenizer::TokenSinkResult::Continue;
+            },
+
+            tokenizer::DoctypeToken(dt) => {
+                if self.mode == Initial {
+                    let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
+                    if err {
+                        self.sink.parse_error(format_if!(
+                            self.opts.exact_errors,
+                            "Bad DOCTYPE",
+                            "Bad DOCTYPE: {:?}",
+                            dt
+                        ));
+                    }
+                    let Doctype {
+                        name,
+                        public_id,
+                        system_id,
+                        force_quirks: _,
+                    } = dt;
+                    if !self.opts.drop_doctype {
+                        self.sink.append_doctype_to_document(
+                            name.unwrap_or(StrTendril::new()),
+                            public_id.unwrap_or(StrTendril::new()),
+                            system_id.unwrap_or(StrTendril::new()),
+                        );
+                    }
+                    self.set_quirks_mode(quirk);
+
+                    self.mode = BeforeHtml;
+                    return tokenizer::TokenSinkResult::Continue;
+                } else {
+                    self.sink.parse_error(format_if!(
+                        self.opts.exact_errors,
+                        "DOCTYPE in body",
+                        "DOCTYPE in insertion mode {:?}",
+                        self.mode
+                    ));
+                    return tokenizer::TokenSinkResult::Continue;
+                }
+            },
+
+            tokenizer::TagToken(x) => TagToken(x),
+            tokenizer::CommentToken(x) => CommentToken(x),
+            tokenizer::NullCharacterToken => NullCharacterToken,
+            tokenizer::EOFToken => EOFToken,
+
+            tokenizer::CharacterTokens(mut x) => {
+                if ignore_lf && x.starts_with("\n") {
+                    x.pop_front(1);
+                }
+                if x.is_empty() {
+                    return tokenizer::TokenSinkResult::Continue;
+                }
+                CharacterTokens(NotSplit, x)
+            },
+        };
+
+        self.process_to_completion(token)
+    }
+
+    fn end(&mut self) {
+        for elem in self.open_elems.drain(..).rev() {
+            self.sink.pop(&elem);
+        }
+    }
+
+    fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
+        !self.open_elems.is_empty() &&
+            self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html)
+    }
+}
+
+pub fn html_elem<Handle>(open_elems: &[Handle]) -> &Handle {
+    &open_elems[0]
+}
+
+pub struct ActiveFormattingIter<'a, Handle: 'a> {
+    iter: Rev<Enumerate<slice::Iter<'a, FormatEntry<Handle>>>>,
+}
+
+impl<'a, Handle> Iterator for ActiveFormattingIter<'a, Handle> {
+    type Item = (usize, &'a Handle, &'a Tag);
+    fn next(&mut self) -> Option<(usize, &'a Handle, &'a Tag)> {
+        match self.iter.next() {
+            None | Some((_, &Marker)) => None,
+            Some((i, &Element(ref h, ref t))) => Some((i, h, t)),
+        }
+    }
+}
+
+pub enum PushFlag {
+    Push,
+    NoPush,
+}
+
+enum Bookmark<Handle> {
+    Replace(Handle),
+    InsertAfter(Handle),
+}
+
+macro_rules! qualname {
+    ("", $local:tt) => {
+        QualName {
+            prefix: None,
+            ns: ns!(),
+            local: local_name!($local),
+        }
+    };
+    ($prefix: tt $ns:tt $local:tt) => {
+        QualName {
+            prefix: Some(namespace_prefix!($prefix)),
+            ns: ns!($ns),
+            local: local_name!($local),
+        }
+    };
+}
+
+#[doc(hidden)]
+impl<Handle, Sink> TreeBuilder<Handle, Sink>
+where
+    Handle: Clone,
+    Sink: TreeSink<Handle = Handle>,
+{
+    fn unexpected<T: fmt::Debug>(&mut self, _thing: &T) -> ProcessResult<Handle> {
+        self.sink.parse_error(format_if!(
+            self.opts.exact_errors,
+            "Unexpected token",
+            "Unexpected token {} in insertion mode {:?}",
+            to_escaped_string(_thing),
+            self.mode
+        ));
+        Done
+    }
+
+    fn assert_named(&mut self, node: &Handle, name: LocalName) {
+        assert!(self.html_elem_named(&node, name));
+    }
+
+    /// Iterate over the active formatting elements (with index in the list) from the end
+    /// to the last marker, or the beginning if there are no markers.
+    fn active_formatting_end_to_marker<'a>(&'a self) -> ActiveFormattingIter<'a, Handle> {
+        ActiveFormattingIter {
+            iter: self.active_formatting.iter().enumerate().rev(),
+        }
+    }
+
+    fn position_in_active_formatting(&self, element: &Handle) -> Option<usize> {
+        self.active_formatting.iter().position(|n| match n {
+            &Marker => false,
+            &Element(ref handle, _) => self.sink.same_node(handle, element),
+        })
+    }
+
+    fn set_quirks_mode(&mut self, mode: QuirksMode) {
+        self.quirks_mode = mode;
+        self.sink.set_quirks_mode(mode);
+    }
+
+    fn stop_parsing(&mut self) -> ProcessResult<Handle> {
+        Done
+    }
+
+    //§ parsing-elements-that-contain-only-text
+    // Switch to `Text` insertion mode, save the old mode, and
+    // switch the tokenizer to a raw-data state.
+    // The latter only takes effect after the current / next
+    // `process_token` of a start tag returns!
+    fn to_raw_text_mode(&mut self, k: RawKind) -> ProcessResult<Handle> {
+        self.orig_mode = Some(self.mode);
+        self.mode = Text;
+        ToRawData(k)
+    }
+
+    // The generic raw text / RCDATA parsing algorithm.
+    fn parse_raw_data(&mut self, tag: Tag, k: RawKind) -> ProcessResult<Handle> {
+        self.insert_element_for(tag);
+        self.to_raw_text_mode(k)
+    }
+    //§ END
+
+    fn current_node(&self) -> &Handle {
+        self.open_elems.last().expect("no current element")
+    }
+
+    fn adjusted_current_node(&self) -> &Handle {
+        if self.open_elems.len() == 1 {
+            if let Some(ctx) = self.context_elem.as_ref() {
+                return ctx;
+            }
+        }
+        self.current_node()
+    }
+
+    fn current_node_in<TagSet>(&self, set: TagSet) -> bool
+    where
+        TagSet: Fn(ExpandedName) -> bool,
+    {
+        set(self.sink.elem_name(self.current_node()))
+    }
+
+    // Insert at the "appropriate place for inserting a node".
+    fn insert_appropriately(&mut self, child: NodeOrText<Handle>, override_target: Option<Handle>) {
+        let insertion_point = self.appropriate_place_for_insertion(override_target);
+        self.insert_at(insertion_point, child);
+    }
+
+    fn adoption_agency(&mut self, subject: LocalName) {
+        // 1.
+        if self.current_node_named(subject.clone()) {
+            if self
+                .position_in_active_formatting(self.current_node())
+                .is_none()
+            {
+                self.pop();
+                return;
+            }
+        }
+
+        // 2. 3. 4.
+        for _ in 0..8 {
+            // 5.
+            let (fmt_elem_index, fmt_elem, fmt_elem_tag) = unwrap_or_return!(
+                // We clone the Handle and Tag so they don't cause an immutable borrow of self.
+                self.active_formatting_end_to_marker()
+                    .filter(|&(_, _, tag)| tag.name == subject)
+                    .next()
+                    .map(|(i, h, t)| (i, h.clone(), t.clone())),
+                {
+                    self.process_end_tag_in_body(Tag {
+                        kind: EndTag,
+                        name: subject,
+                        self_closing: false,
+                        attrs: vec![],
+                    });
+                }
+            );
+
+            let fmt_elem_stack_index = unwrap_or_return!(
+                self.open_elems
+                    .iter()
+                    .rposition(|n| self.sink.same_node(n, &fmt_elem)),
+                {
+                    self.sink
+                        .parse_error(Borrowed("Formatting element not open"));
+                    self.active_formatting.remove(fmt_elem_index);
+                }
+            );
+
+            // 7.
+            if !self.in_scope(default_scope, |n| self.sink.same_node(&n, &fmt_elem)) {
+                self.sink
+                    .parse_error(Borrowed("Formatting element not in scope"));
+                return;
+            }
+
+            // 8.
+            if !self.sink.same_node(self.current_node(), &fmt_elem) {
+                self.sink
+                    .parse_error(Borrowed("Formatting element not current node"));
+            }
+
+            // 9.
+            let (furthest_block_index, furthest_block) = unwrap_or_return!(
+                self.open_elems
+                    .iter()
+                    .enumerate()
+                    .skip(fmt_elem_stack_index)
+                    .filter(|&(_, open_element)| self.elem_in(open_element, special_tag))
+                    .next()
+                    .map(|(i, h)| (i, h.clone())),
+                // 10.
+                {
+                    self.open_elems.truncate(fmt_elem_stack_index);
+                    self.active_formatting.remove(fmt_elem_index);
+                }
+            );
+
+            // 11.
+            let common_ancestor = self.open_elems[fmt_elem_stack_index - 1].clone();
+
+            // 12.
+            let mut bookmark = Bookmark::Replace(fmt_elem.clone());
+
+            // 13.
+            let mut node;
+            let mut node_index = furthest_block_index;
+            let mut last_node = furthest_block.clone();
+
+            // 13.1.
+            let mut inner_counter = 0;
+            loop {
+                // 13.2.
+                inner_counter += 1;
+
+                // 13.3.
+                node_index -= 1;
+                node = self.open_elems[node_index].clone();
+
+                // 13.4.
+                if self.sink.same_node(&node, &fmt_elem) {
+                    break;
+                }
+
+                // 13.5.
+                if inner_counter > 3 {
+                    self.position_in_active_formatting(&node)
+                        .map(|position| self.active_formatting.remove(position));
+                    self.open_elems.remove(node_index);
+                    continue;
+                }
+
+                let node_formatting_index = unwrap_or_else!(
+                    self.position_in_active_formatting(&node),
+                    // 13.6.
+                    {
+                        self.open_elems.remove(node_index);
+                        continue;
+                    }
+                );
+
+                // 13.7.
+                let tag = match self.active_formatting[node_formatting_index] {
+                    Element(ref h, ref t) => {
+                        assert!(self.sink.same_node(h, &node));
+                        t.clone()
+                    },
+                    Marker => panic!("Found marker during adoption agency"),
+                };
+                // FIXME: Is there a way to avoid cloning the attributes twice here (once on their
+                // own, once as part of t.clone() above)?
+                let new_element = create_element(
+                    &mut self.sink,
+                    QualName::new(None, ns!(html), tag.name.clone()),
+                    tag.attrs.clone(),
+                );
+                self.open_elems[node_index] = new_element.clone();
+                self.active_formatting[node_formatting_index] = Element(new_element.clone(), tag);
+                node = new_element;
+
+                // 13.8.
+                if self.sink.same_node(&last_node, &furthest_block) {
+                    bookmark = Bookmark::InsertAfter(node.clone());
+                }
+
+                // 13.9.
+                self.sink.remove_from_parent(&last_node);
+                self.sink.append(&node, AppendNode(last_node.clone()));
+
+                // 13.10.
+                last_node = node.clone();
+
+                // 13.11.
+            }
+
+            // 14.
+            self.sink.remove_from_parent(&last_node);
+            self.insert_appropriately(AppendNode(last_node.clone()), Some(common_ancestor));
+
+            // 15.
+            // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
+            // once as part of t.clone() above)?
+            let new_element = create_element(
+                &mut self.sink,
+                QualName::new(None, ns!(html), fmt_elem_tag.name.clone()),
+                fmt_elem_tag.attrs.clone(),
+            );
+            let new_entry = Element(new_element.clone(), fmt_elem_tag);
+
+            // 16.
+            self.sink.reparent_children(&furthest_block, &new_element);
+
+            // 17.
+            self.sink
+                .append(&furthest_block, AppendNode(new_element.clone()));
+
+            // 18.
+            // FIXME: We could probably get rid of the position_in_active_formatting() calls here
+            // if we had a more clever Bookmark representation.
+            match bookmark {
+                Bookmark::Replace(to_replace) => {
+                    let index = self
+                        .position_in_active_formatting(&to_replace)
+                        .expect("bookmark not found in active formatting elements");
+                    self.active_formatting[index] = new_entry;
+                },
+                Bookmark::InsertAfter(previous) => {
+                    let index = self
+                        .position_in_active_formatting(&previous)
+                        .expect("bookmark not found in active formatting elements") +
+                        1;
+                    self.active_formatting.insert(index, new_entry);
+                    let old_index = self
+                        .position_in_active_formatting(&fmt_elem)
+                        .expect("formatting element not found in active formatting elements");
+                    self.active_formatting.remove(old_index);
+                },
+            }
+
+            // 19.
+            self.remove_from_stack(&fmt_elem);
+            let new_furthest_block_index = self
+                .open_elems
+                .iter()
+                .position(|n| self.sink.same_node(n, &furthest_block))
+                .expect("furthest block missing from open element stack");
+            self.open_elems
+                .insert(new_furthest_block_index + 1, new_element);
+
+            // 20.
+        }
+    }
+
+    fn push(&mut self, elem: &Handle) {
+        self.open_elems.push(elem.clone());
+    }
+
+    fn pop(&mut self) -> Handle {
+        let elem = self.open_elems.pop().expect("no current element");
+        self.sink.pop(&elem);
+        elem
+    }
+
+    fn remove_from_stack(&mut self, elem: &Handle) {
+        let sink = &mut self.sink;
+        let position = self
+            .open_elems
+            .iter()
+            .rposition(|x| sink.same_node(elem, &x));
+        if let Some(position) = position {
+            self.open_elems.remove(position);
+            sink.pop(elem);
+        }
+    }
+
+    fn is_marker_or_open(&self, entry: &FormatEntry<Handle>) -> bool {
+        match *entry {
+            Marker => true,
+            Element(ref node, _) => self
+                .open_elems
+                .iter()
+                .rev()
+                .any(|n| self.sink.same_node(&n, &node)),
+        }
+    }
+
+    /// Reconstruct the active formatting elements.
+    fn reconstruct_formatting(&mut self) {
+        {
+            let last = unwrap_or_return!(self.active_formatting.last(), ());
+            if self.is_marker_or_open(last) {
+                return;
+            }
+        }
+
+        let mut entry_index = self.active_formatting.len() - 1;
+        loop {
+            if entry_index == 0 {
+                break;
+            }
+            entry_index -= 1;
+            if self.is_marker_or_open(&self.active_formatting[entry_index]) {
+                entry_index += 1;
+                break;
+            }
+        }
+
+        loop {
+            let tag = match self.active_formatting[entry_index] {
+                Element(_, ref t) => t.clone(),
+                Marker => panic!("Found marker during formatting element reconstruction"),
+            };
+
+            // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
+            // once as part of t.clone() above)?
+            let new_element =
+                self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone());
+            self.active_formatting[entry_index] = Element(new_element, tag);
+            if entry_index == self.active_formatting.len() - 1 {
+                break;
+            }
+            entry_index += 1;
+        }
+    }
+
+    /// Get the first element on the stack, which will be the <html> element.
+    fn html_elem(&self) -> &Handle {
+        &self.open_elems[0]
+    }
+
+    /// Get the second element on the stack, if it's a HTML body element.
+    fn body_elem(&self) -> Option<&Handle> {
+        if self.open_elems.len() <= 1 {
+            return None;
+        }
+
+        let node = &self.open_elems[1];
+        if self.html_elem_named(node, local_name!("body")) {
+            Some(node)
+        } else {
+            None
+        }
+    }
+
+    /// Signal an error depending on the state of the stack of open elements at
+    /// the end of the body.
+    fn check_body_end(&mut self) {
+        declare_tag_set!(body_end_ok =
+            "dd" "dt" "li" "optgroup" "option" "p" "rp" "rt" "tbody" "td" "tfoot" "th"
+            "thead" "tr" "body" "html");
+
+        for elem in self.open_elems.iter() {
+            let error;
+            {
+                let name = self.sink.elem_name(elem);
+                if body_end_ok(name) {
+                    continue;
+                }
+                error = format_if!(
+                    self.opts.exact_errors,
+                    "Unexpected open tag at end of body",
+                    "Unexpected open tag {:?} at end of body",
+                    name
+                );
+            }
+            self.sink.parse_error(error);
+            // FIXME: Do we keep checking after finding one bad tag?
+            // The spec suggests not.
+            return;
+        }
+    }
+
+    fn in_scope<TagSet, Pred>(&self, scope: TagSet, pred: Pred) -> bool
+    where
+        TagSet: Fn(ExpandedName) -> bool,
+        Pred: Fn(Handle) -> bool,
+    {
+        for node in self.open_elems.iter().rev() {
+            if pred(node.clone()) {
+                return true;
+            }
+            if scope(self.sink.elem_name(node)) {
+                return false;
+            }
+        }
+
+        // supposed to be impossible, because <html> is always in scope
+
+        false
+    }
+
+    fn elem_in<TagSet>(&self, elem: &Handle, set: TagSet) -> bool
+    where
+        TagSet: Fn(ExpandedName) -> bool,
+    {
+        set(self.sink.elem_name(elem))
+    }
+
+    fn html_elem_named(&self, elem: &Handle, name: LocalName) -> bool {
+        let expanded = self.sink.elem_name(elem);
+        *expanded.ns == ns!(html) && *expanded.local == name
+    }
+
+    fn in_html_elem_named(&self, name: LocalName) -> bool {
+        self.open_elems
+            .iter()
+            .any(|elem| self.html_elem_named(elem, name.clone()))
+    }
+
+    fn current_node_named(&self, name: LocalName) -> bool {
+        self.html_elem_named(self.current_node(), name)
+    }
+
+    fn in_scope_named<TagSet>(&self, scope: TagSet, name: LocalName) -> bool
+    where
+        TagSet: Fn(ExpandedName) -> bool,
+    {
+        self.in_scope(scope, |elem| self.html_elem_named(&elem, name.clone()))
+    }
+
+    //§ closing-elements-that-have-implied-end-tags
+    fn generate_implied_end<TagSet>(&mut self, set: TagSet)
+    where
+        TagSet: Fn(ExpandedName) -> bool,
+    {
+        loop {
+            {
+                let elem = unwrap_or_return!(self.open_elems.last(), ());
+                let nsname = self.sink.elem_name(elem);
+                if !set(nsname) {
+                    return;
+                }
+            }
+            self.pop();
+        }
+    }
+
+    fn generate_implied_end_except(&mut self, except: LocalName) {
+        self.generate_implied_end(|p| {
+            if *p.ns == ns!(html) && *p.local == except {
+                false
+            } else {
+                cursory_implied_end(p)
+            }
+        });
+    }
+    //§ END
+
+    // Pop elements until the current element is in the set.
+    fn pop_until_current<TagSet>(&mut self, pred: TagSet)
+    where
+        TagSet: Fn(ExpandedName) -> bool,
+    {
+        loop {
+            if self.current_node_in(|x| pred(x)) {
+                break;
+            }
+            self.open_elems.pop();
+        }
+    }
+
+    // Pop elements until an element from the set has been popped.  Returns the
+    // number of elements popped.
+    fn pop_until<P>(&mut self, pred: P) -> usize
+    where
+        P: Fn(ExpandedName) -> bool,
+    {
+        let mut n = 0;
+        loop {
+            n += 1;
+            match self.open_elems.pop() {
+                None => break,
+                Some(elem) => {
+                    if pred(self.sink.elem_name(&elem)) {
+                        break;
+                    }
+                },
+            }
+        }
+        n
+    }
+
+    fn pop_until_named(&mut self, name: LocalName) -> usize {
+        self.pop_until(|p| *p.ns == ns!(html) && *p.local == name)
+    }
+
+    // Pop elements until one with the specified name has been popped.
+    // Signal an error if it was not the first one.
+    fn expect_to_close(&mut self, name: LocalName) {
+        if self.pop_until_named(name.clone()) != 1 {
+            self.sink.parse_error(format_if!(
+                self.opts.exact_errors,
+                "Unexpected open element",
+                "Unexpected open element while closing {:?}",
+                name
+            ));
+        }
+    }
+
+    fn close_p_element(&mut self) {
+        declare_tag_set!(implied = [cursory_implied_end] - "p");
+        self.generate_implied_end(implied);
+        self.expect_to_close(local_name!("p"));
+    }
+
+    fn close_p_element_in_button_scope(&mut self) {
+        if self.in_scope_named(button_scope, local_name!("p")) {
+            self.close_p_element();
+        }
+    }
+
+    // Check <input> tags for type=hidden
+    fn is_type_hidden(&self, tag: &Tag) -> bool {
+        match tag
+            .attrs
+            .iter()
+            .find(|&at| at.name.expanded() == expanded_name!("", "type"))
+        {
+            None => false,
+            Some(at) => (&*at.value).eq_ignore_ascii_case("hidden"),
+        }
+    }
+
+    fn foster_parent_in_body(&mut self, token: Token) -> ProcessResult<Handle> {
+        warn!("foster parenting not implemented");
+        self.foster_parenting = true;
+        let res = self.step(InBody, token);
+        // FIXME: what if res is Reprocess?
+        self.foster_parenting = false;
+        res
+    }
+
+    fn process_chars_in_table(&mut self, token: Token) -> ProcessResult<Handle> {
+        declare_tag_set!(table_outer = "table" "tbody" "tfoot" "thead" "tr");
+        if self.current_node_in(table_outer) {
+            assert!(self.pending_table_text.is_empty());
+            self.orig_mode = Some(self.mode);
+            Reprocess(InTableText, token)
+        } else {
+            self.sink.parse_error(format_if!(
+                self.opts.exact_errors,
+                "Unexpected characters in table",
+                "Unexpected characters {} in table",
+                to_escaped_string(&token)
+            ));
+            self.foster_parent_in_body(token)
+        }
+    }
+
+    // https://html.spec.whatwg.org/multipage/#reset-the-insertion-mode-appropriately
+    fn reset_insertion_mode(&mut self) -> InsertionMode {
+        for (i, mut node) in self.open_elems.iter().enumerate().rev() {
+            let last = i == 0usize;
+            if let (true, Some(ctx)) = (last, self.context_elem.as_ref()) {
+                node = ctx;
+            }
+            let name = match self.sink.elem_name(node) {
+                ExpandedName {
+                    ns: &ns!(html),
+                    local,
+                } => local,
+                _ => continue,
+            };
+            match *name {
+                local_name!("select") => {
+                    for ancestor in self.open_elems[0..i].iter().rev() {
+                        if self.html_elem_named(ancestor, local_name!("template")) {
+                            return InSelect;
+                        } else if self.html_elem_named(ancestor, local_name!("table")) {
+                            return InSelectInTable;
+                        }
+                    }
+                    return InSelect;
+                },
+                local_name!("td") | local_name!("th") => {
+                    if !last {
+                        return InCell;
+                    }
+                },
+                local_name!("tr") => return InRow,
+                local_name!("tbody") | local_name!("thead") | local_name!("tfoot") => {
+                    return InTableBody;
+                },
+                local_name!("caption") => return InCaption,
+                local_name!("colgroup") => return InColumnGroup,
+                local_name!("table") => return InTable,
+                local_name!("template") => return *self.template_modes.last().unwrap(),
+                local_name!("head") => {
+                    if !last {
+                        return InHead;
+                    }
+                },
+                local_name!("body") => return InBody,
+                local_name!("frameset") => return InFrameset,
+                local_name!("html") => match self.head_elem {
+                    None => return BeforeHead,
+                    Some(_) => return AfterHead,
+                },
+
+                _ => (),
+            }
+        }
+        InBody
+    }
+
+    fn close_the_cell(&mut self) {
+        self.generate_implied_end(cursory_implied_end);
+        if self.pop_until(td_th) != 1 {
+            self.sink
+                .parse_error(Borrowed("expected to close <td> or <th> with cell"));
+        }
+        self.clear_active_formatting_to_marker();
+    }
+
+    fn append_text(&mut self, text: StrTendril) -> ProcessResult<Handle> {
+        self.insert_appropriately(AppendText(text), None);
+        Done
+    }
+
+    fn append_comment(&mut self, text: StrTendril) -> ProcessResult<Handle> {
+        let comment = self.sink.create_comment(text);
+        self.insert_appropriately(AppendNode(comment), None);
+        Done
+    }
+
+    fn append_comment_to_doc(&mut self, text: StrTendril) -> ProcessResult<Handle> {
+        let comment = self.sink.create_comment(text);
+        self.sink.append(&self.doc_handle, AppendNode(comment));
+        Done
+    }
+
+    fn append_comment_to_html(&mut self, text: StrTendril) -> ProcessResult<Handle> {
+        let target = html_elem(&self.open_elems);
+        let comment = self.sink.create_comment(text);
+        self.sink.append(target, AppendNode(comment));
+        Done
+    }
+
+    //§ creating-and-inserting-nodes
+    fn create_root(&mut self, attrs: Vec<Attribute>) {
+        let elem = create_element(
+            &mut self.sink,
+            QualName::new(None, ns!(html), local_name!("html")),
+            attrs,
+        );
+        self.push(&elem);
+        self.sink.append(&self.doc_handle, AppendNode(elem));
+        // FIXME: application cache selection algorithm
+    }
+
+    // https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token
+    fn insert_element(
+        &mut self,
+        push: PushFlag,
+        ns: Namespace,
+        name: LocalName,
+        attrs: Vec<Attribute>,
+    ) -> Handle {
+        declare_tag_set!(form_associatable =
+            "button" "fieldset" "input" "object"
+            "output" "select" "textarea" "img");
+
+        declare_tag_set!(listed = [form_associatable] - "img");
+
+        // Step 7.
+        let qname = QualName::new(None, ns, name);
+        let elem = create_element(&mut self.sink, qname.clone(), attrs.clone());
+
+        let insertion_point = self.appropriate_place_for_insertion(None);
+        let (node1, node2) = match insertion_point {
+            LastChild(ref p) | BeforeSibling(ref p) => (p.clone(), None),
+            TableFosterParenting {
+                ref element,
+                ref prev_element,
+            } => (element.clone(), Some(prev_element.clone())),
+        };
+
+        // Step 12.
+        if form_associatable(qname.expanded()) &&
+            self.form_elem.is_some() &&
+            !self.in_html_elem_named(local_name!("template")) &&
+            !(listed(qname.expanded()) &&
+                attrs
+                    .iter()
+                    .any(|a| a.name.expanded() == expanded_name!("", "form")))
+        {
+            let form = self.form_elem.as_ref().unwrap().clone();
+            let node2 = match node2 {
+                Some(ref n) => Some(n),
+                None => None,
+            };
+            self.sink.associate_with_form(&elem, &form, (&node1, node2));
+        }
+
+        self.insert_at(insertion_point, AppendNode(elem.clone()));
+
+        match push {
+            Push => self.push(&elem),
+            NoPush => (),
+        }
+        // FIXME: Remove from the stack if we can't append?
+        elem
+    }
+
+    fn insert_element_for(&mut self, tag: Tag) -> Handle {
+        self.insert_element(Push, ns!(html), tag.name, tag.attrs)
+    }
+
+    fn insert_and_pop_element_for(&mut self, tag: Tag) -> Handle {
+        self.insert_element(NoPush, ns!(html), tag.name, tag.attrs)
+    }
+
+    fn insert_phantom(&mut self, name: LocalName) -> Handle {
+        self.insert_element(Push, ns!(html), name, vec![])
+    }
+    //§ END
+
+    fn create_formatting_element_for(&mut self, tag: Tag) -> Handle {
+        // FIXME: This really wants unit tests.
+        let mut first_match = None;
+        let mut matches = 0usize;
+        for (i, _, old_tag) in self.active_formatting_end_to_marker() {
+            if tag.equiv_modulo_attr_order(old_tag) {
+                first_match = Some(i);
+                matches += 1;
+            }
+        }
+
+        if matches >= 3 {
+            self.active_formatting
+                .remove(first_match.expect("matches with no index"));
+        }
+
+        let elem = self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone());
+        self.active_formatting.push(Element(elem.clone(), tag));
+        elem
+    }
+
+    fn clear_active_formatting_to_marker(&mut self) {
+        loop {
+            match self.active_formatting.pop() {
+                None | Some(Marker) => break,
+                _ => (),
+            }
+        }
+    }
+
+    fn process_end_tag_in_body(&mut self, tag: Tag) {
+        // Look back for a matching open element.
+        let mut match_idx = None;
+        for (i, elem) in self.open_elems.iter().enumerate().rev() {
+            if self.html_elem_named(elem, tag.name.clone()) {
+                match_idx = Some(i);
+                break;
+            }
+
+            if self.elem_in(elem, special_tag) {
+                self.sink
+                    .parse_error(Borrowed("Found special tag while closing generic tag"));
+                return;
+            }
+        }
+
+        // Can't use unwrap_or_return!() due to rust-lang/rust#16617.
+        let match_idx = match match_idx {
+            None => {
+                // I believe this is impossible, because the root
+                // <html> element is in special_tag.
+                self.unexpected(&tag);
+                return;
+            },
+            Some(x) => x,
+        };
+
+        self.generate_implied_end_except(tag.name.clone());
+
+        if match_idx != self.open_elems.len() - 1 {
+            // mis-nested tags
+            self.unexpected(&tag);
+        }
+        self.open_elems.truncate(match_idx);
+    }
+
+    fn handle_misnested_a_tags(&mut self, tag: &Tag) {
+        let node = unwrap_or_return!(
+            self.active_formatting_end_to_marker()
+                .filter(|&(_, n, _)| self.html_elem_named(n, local_name!("a")))
+                .next()
+                .map(|(_, n, _)| n.clone()),
+            ()
+        );
+
+        self.unexpected(tag);
+        self.adoption_agency(local_name!("a"));
+        self.position_in_active_formatting(&node)
+            .map(|index| self.active_formatting.remove(index));
+        self.remove_from_stack(&node);
+    }
+
+    //§ tree-construction
+    fn is_foreign(&mut self, token: &Token) -> bool {
+        if let EOFToken = *token {
+            return false;
+        }
+
+        if self.open_elems.is_empty() {
+            return false;
+        }
+
+        let name = self.sink.elem_name(self.adjusted_current_node());
+        if let ns!(html) = *name.ns {
+            return false;
+        }
+
+        if mathml_text_integration_point(name) {
+            match *token {
+                CharacterTokens(..) | NullCharacterToken => return false,
+                TagToken(Tag {
+                    kind: StartTag,
+                    ref name,
+                    ..
+                }) if !matches!(*name, local_name!("mglyph") | local_name!("malignmark")) => {
+                    return false;
+                },
+                _ => (),
+            }
+        }
+
+        if svg_html_integration_point(name) {
+            match *token {
+                CharacterTokens(..) | NullCharacterToken => return false,
+                TagToken(Tag { kind: StartTag, .. }) => return false,
+                _ => (),
+            }
+        }
+
+        if let expanded_name!(mathml "annotation-xml") = name {
+            match *token {
+                TagToken(Tag {
+                    kind: StartTag,
+                    name: local_name!("svg"),
+                    ..
+                }) => return false,
+                CharacterTokens(..) | NullCharacterToken | TagToken(Tag { kind: StartTag, .. }) => {
+                    return !self
+                        .sink
+                        .is_mathml_annotation_xml_integration_point(self.adjusted_current_node());
+                },
+                _ => {},
+            };
+        }
+
+        true
+    }
+    //§ END
+
+    fn enter_foreign(&mut self, mut tag: Tag, ns: Namespace) -> ProcessResult<Handle> {
+        match ns {
+            ns!(mathml) => self.adjust_mathml_attributes(&mut tag),
+            ns!(svg) => self.adjust_svg_attributes(&mut tag),
+            _ => (),
+        }
+        self.adjust_foreign_attributes(&mut tag);
+
+        if tag.self_closing {
+            self.insert_element(NoPush, ns, tag.name, tag.attrs);
+            DoneAckSelfClosing
+        } else {
+            self.insert_element(Push, ns, tag.name, tag.attrs);
+            Done
+        }
+    }
+
+    fn adjust_svg_tag_name(&mut self, tag: &mut Tag) {
+        let Tag { ref mut name, .. } = *tag;
+        match *name {
+            local_name!("altglyph") => *name = local_name!("altGlyph"),
+            local_name!("altglyphdef") => *name = local_name!("altGlyphDef"),
+            local_name!("altglyphitem") => *name = local_name!("altGlyphItem"),
+            local_name!("animatecolor") => *name = local_name!("animateColor"),
+            local_name!("animatemotion") => *name = local_name!("animateMotion"),
+            local_name!("animatetransform") => *name = local_name!("animateTransform"),
+            local_name!("clippath") => *name = local_name!("clipPath"),
+            local_name!("feblend") => *name = local_name!("feBlend"),
+            local_name!("fecolormatrix") => *name = local_name!("feColorMatrix"),
+            local_name!("fecomponenttransfer") => *name = local_name!("feComponentTransfer"),
+            local_name!("fecomposite") => *name = local_name!("feComposite"),
+            local_name!("feconvolvematrix") => *name = local_name!("feConvolveMatrix"),
+            local_name!("fediffuselighting") => *name = local_name!("feDiffuseLighting"),
+            local_name!("fedisplacementmap") => *name = local_name!("feDisplacementMap"),
+            local_name!("fedistantlight") => *name = local_name!("feDistantLight"),
+            local_name!("fedropshadow") => *name = local_name!("feDropShadow"),
+            local_name!("feflood") => *name = local_name!("feFlood"),
+            local_name!("fefunca") => *name = local_name!("feFuncA"),
+            local_name!("fefuncb") => *name = local_name!("feFuncB"),
+            local_name!("fefuncg") => *name = local_name!("feFuncG"),
+            local_name!("fefuncr") => *name = local_name!("feFuncR"),
+            local_name!("fegaussianblur") => *name = local_name!("feGaussianBlur"),
+            local_name!("feimage") => *name = local_name!("feImage"),
+            local_name!("femerge") => *name = local_name!("feMerge"),
+            local_name!("femergenode") => *name = local_name!("feMergeNode"),
+            local_name!("femorphology") => *name = local_name!("feMorphology"),
+            local_name!("feoffset") => *name = local_name!("feOffset"),
+            local_name!("fepointlight") => *name = local_name!("fePointLight"),
+            local_name!("fespecularlighting") => *name = local_name!("feSpecularLighting"),
+            local_name!("fespotlight") => *name = local_name!("feSpotLight"),
+            local_name!("fetile") => *name = local_name!("feTile"),
+            local_name!("feturbulence") => *name = local_name!("feTurbulence"),
+            local_name!("foreignobject") => *name = local_name!("foreignObject"),
+            local_name!("glyphref") => *name = local_name!("glyphRef"),
+            local_name!("lineargradient") => *name = local_name!("linearGradient"),
+            local_name!("radialgradient") => *name = local_name!("radialGradient"),
+            local_name!("textpath") => *name = local_name!("textPath"),
+            _ => (),
+        }
+    }
+
+    fn adjust_attributes<F>(&mut self, tag: &mut Tag, mut map: F)
+    where
+        F: FnMut(LocalName) -> Option<QualName>,
+    {
+        for &mut Attribute { ref mut name, .. } in &mut tag.attrs {
+            if let Some(replacement) = map(name.local.clone()) {
+                *name = replacement;
+            }
+        }
+    }
+
+    fn adjust_svg_attributes(&mut self, tag: &mut Tag) {
+        self.adjust_attributes(tag, |k| match k {
+            local_name!("attributename") => Some(qualname!("", "attributeName")),
+            local_name!("attributetype") => Some(qualname!("", "attributeType")),
+            local_name!("basefrequency") => Some(qualname!("", "baseFrequency")),
+            local_name!("baseprofile") => Some(qualname!("", "baseProfile")),
+            local_name!("calcmode") => Some(qualname!("", "calcMode")),
+            local_name!("clippathunits") => Some(qualname!("", "clipPathUnits")),
+            local_name!("diffuseconstant") => Some(qualname!("", "diffuseConstant")),
+            local_name!("edgemode") => Some(qualname!("", "edgeMode")),
+            local_name!("filterunits") => Some(qualname!("", "filterUnits")),
+            local_name!("glyphref") => Some(qualname!("", "glyphRef")),
+            local_name!("gradienttransform") => Some(qualname!("", "gradientTransform")),
+            local_name!("gradientunits") => Some(qualname!("", "gradientUnits")),
+            local_name!("kernelmatrix") => Some(qualname!("", "kernelMatrix")),
+            local_name!("kernelunitlength") => Some(qualname!("", "kernelUnitLength")),
+            local_name!("keypoints") => Some(qualname!("", "keyPoints")),
+            local_name!("keysplines") => Some(qualname!("", "keySplines")),
+            local_name!("keytimes") => Some(qualname!("", "keyTimes")),
+            local_name!("lengthadjust") => Some(qualname!("", "lengthAdjust")),
+            local_name!("limitingconeangle") => Some(qualname!("", "limitingConeAngle")),
+            local_name!("markerheight") => Some(qualname!("", "markerHeight")),
+            local_name!("markerunits") => Some(qualname!("", "markerUnits")),
+            local_name!("markerwidth") => Some(qualname!("", "markerWidth")),
+            local_name!("maskcontentunits") => Some(qualname!("", "maskContentUnits")),
+            local_name!("maskunits") => Some(qualname!("", "maskUnits")),
+            local_name!("numoctaves") => Some(qualname!("", "numOctaves")),
+            local_name!("pathlength") => Some(qualname!("", "pathLength")),
+            local_name!("patterncontentunits") => Some(qualname!("", "patternContentUnits")),
+            local_name!("patterntransform") => Some(qualname!("", "patternTransform")),
+            local_name!("patternunits") => Some(qualname!("", "patternUnits")),
+            local_name!("pointsatx") => Some(qualname!("", "pointsAtX")),
+            local_name!("pointsaty") => Some(qualname!("", "pointsAtY")),
+            local_name!("pointsatz") => Some(qualname!("", "pointsAtZ")),
+            local_name!("preservealpha") => Some(qualname!("", "preserveAlpha")),
+            local_name!("preserveaspectratio") => Some(qualname!("", "preserveAspectRatio")),
+            local_name!("primitiveunits") => Some(qualname!("", "primitiveUnits")),
+            local_name!("refx") => Some(qualname!("", "refX")),
+            local_name!("refy") => Some(qualname!("", "refY")),
+            local_name!("repeatcount") => Some(qualname!("", "repeatCount")),
+            local_name!("repeatdur") => Some(qualname!("", "repeatDur")),
+            local_name!("requiredextensions") => Some(qualname!("", "requiredExtensions")),
+            local_name!("requiredfeatures") => Some(qualname!("", "requiredFeatures")),
+            local_name!("specularconstant") => Some(qualname!("", "specularConstant")),
+            local_name!("specularexponent") => Some(qualname!("", "specularExponent")),
+            local_name!("spreadmethod") => Some(qualname!("", "spreadMethod")),
+            local_name!("startoffset") => Some(qualname!("", "startOffset")),
+            local_name!("stddeviation") => Some(qualname!("", "stdDeviation")),
+            local_name!("stitchtiles") => Some(qualname!("", "stitchTiles")),
+            local_name!("surfacescale") => Some(qualname!("", "surfaceScale")),
+            local_name!("systemlanguage") => Some(qualname!("", "systemLanguage")),
+            local_name!("tablevalues") => Some(qualname!("", "tableValues")),
+            local_name!("targetx") => Some(qualname!("", "targetX")),
+            local_name!("targety") => Some(qualname!("", "targetY")),
+            local_name!("textlength") => Some(qualname!("", "textLength")),
+            local_name!("viewbox") => Some(qualname!("", "viewBox")),
+            local_name!("viewtarget") => Some(qualname!("", "viewTarget")),
+            local_name!("xchannelselector") => Some(qualname!("", "xChannelSelector")),
+            local_name!("ychannelselector") => Some(qualname!("", "yChannelSelector")),
+            local_name!("zoomandpan") => Some(qualname!("", "zoomAndPan")),
+            _ => None,
+        });
+    }
+
+    fn adjust_mathml_attributes(&mut self, tag: &mut Tag) {
+        self.adjust_attributes(tag, |k| match k {
+            local_name!("definitionurl") => Some(qualname!("", "definitionURL")),
+            _ => None,
+        });
+    }
+
+    fn adjust_foreign_attributes(&mut self, tag: &mut Tag) {
+        self.adjust_attributes(tag, |k| match k {
+            local_name!("xlink:actuate") => Some(qualname!("xlink" xlink "actuate")),
+            local_name!("xlink:arcrole") => Some(qualname!("xlink" xlink "arcrole")),
+            local_name!("xlink:href") => Some(qualname!("xlink" xlink "href")),
+            local_name!("xlink:role") => Some(qualname!("xlink" xlink "role")),
+            local_name!("xlink:show") => Some(qualname!("xlink" xlink "show")),
+            local_name!("xlink:title") => Some(qualname!("xlink" xlink "title")),
+            local_name!("xlink:type") => Some(qualname!("xlink" xlink "type")),
+            local_name!("xml:base") => Some(qualname!("xml" xml "base")),
+            local_name!("xml:lang") => Some(qualname!("xml" xml "lang")),
+            local_name!("xml:space") => Some(qualname!("xml" xml "space")),
+            local_name!("xmlns") => Some(qualname!("" xmlns "xmlns")),
+            local_name!("xmlns:xlink") => Some(qualname!("xmlns" xmlns "xlink")),
+            _ => None,
+        });
+    }
+
+    fn foreign_start_tag(&mut self, mut tag: Tag) -> ProcessResult<Handle> {
+        let current_ns = self.sink.elem_name(self.adjusted_current_node()).ns.clone();
+        match current_ns {
+            ns!(mathml) => self.adjust_mathml_attributes(&mut tag),
+            ns!(svg) => {
+                self.adjust_svg_tag_name(&mut tag);
+                self.adjust_svg_attributes(&mut tag);
+            },
+            _ => (),
+        }
+        self.adjust_foreign_attributes(&mut tag);
+        if tag.self_closing {
+            // FIXME(#118): <script /> in SVG
+            self.insert_element(NoPush, current_ns, tag.name, tag.attrs);
+            DoneAckSelfClosing
+        } else {
+            self.insert_element(Push, current_ns, tag.name, tag.attrs);
+            Done
+        }
+    }
+
+    fn unexpected_start_tag_in_foreign_content(&mut self, tag: Tag) -> ProcessResult<Handle> {
+        self.unexpected(&tag);
+        if self.is_fragment() {
+            self.foreign_start_tag(tag)
+        } else {
+            self.pop();
+            while !self.current_node_in(|n| {
+                *n.ns == ns!(html) ||
+                    mathml_text_integration_point(n) ||
+                    svg_html_integration_point(n)
+            }) {
+                self.pop();
+            }
+            ReprocessForeign(TagToken(tag))
+        }
+    }
+}
diff --git a/src/tree_builder/rules.rs b/src/tree_builder/rules.rs
new file mode 100644
index 0000000..bdc8afd
--- /dev/null
+++ b/src/tree_builder/rules.rs
@@ -0,0 +1,1449 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// The tree builder rules, as a single, enormous nested match expression.
+
+use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns};
+use crate::tokenizer::states::{Plaintext, Rawtext, Rcdata, ScriptData};
+use crate::tree_builder::tag_sets::*;
+use crate::tree_builder::types::*;
+
+use std::borrow::ToOwned;
+
+use crate::tendril::SliceExt;
+
+fn any_not_whitespace(x: &StrTendril) -> bool {
+    // FIXME: this might be much faster as a byte scan
+    x.chars().any(|c| !is_ascii_whitespace(c))
+}
+
+fn current_node<Handle>(open_elems: &[Handle]) -> &Handle {
+    open_elems.last().expect("no current element")
+}
+
+#[doc(hidden)]
+impl<Handle, Sink> TreeBuilder<Handle, Sink>
+where
+    Handle: Clone,
+    Sink: TreeSink<Handle = Handle>,
+{
+    fn step(&mut self, mode: InsertionMode, token: Token) -> ProcessResult<Handle> {
+        self.debug_step(mode, &token);
+
+        match mode {
+            //§ the-initial-insertion-mode
+            Initial => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, _) => Done,
+                CommentToken(text) => self.append_comment_to_doc(text),
+                token => {
+                    if !self.opts.iframe_srcdoc {
+                        self.unexpected(&token);
+                        self.set_quirks_mode(Quirks);
+                    }
+                    Reprocess(BeforeHtml, token)
+                }
+            }),
+
+            //§ the-before-html-insertion-mode
+            BeforeHtml => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, _) => Done,
+                CommentToken(text) => self.append_comment_to_doc(text),
+
+                tag @ <html> => {
+                    self.create_root(tag.attrs);
+                    self.mode = BeforeHead;
+                    Done
+                }
+
+                </head> </body> </html> </br> => else,
+
+                tag @ </_> => self.unexpected(&tag),
+
+                token => {
+                    self.create_root(vec!());
+                    Reprocess(BeforeHead, token)
+                }
+            }),
+
+            //§ the-before-head-insertion-mode
+            BeforeHead => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, _) => Done,
+                CommentToken(text) => self.append_comment(text),
+
+                <html> => self.step(InBody, token),
+
+                tag @ <head> => {
+                    self.head_elem = Some(self.insert_element_for(tag));
+                    self.mode = InHead;
+                    Done
+                }
+
+                </head> </body> </html> </br> => else,
+
+                tag @ </_> => self.unexpected(&tag),
+
+                token => {
+                    self.head_elem = Some(self.insert_phantom(local_name!("head")));
+                    Reprocess(InHead, token)
+                }
+            }),
+
+            //§ parsing-main-inhead
+            InHead => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, text) => self.append_text(text),
+                CommentToken(text) => self.append_comment(text),
+
+                <html> => self.step(InBody, token),
+
+                tag @ <base> <basefont> <bgsound> <link> <meta> => {
+                    // FIXME: handle <meta charset=...> and <meta http-equiv="Content-Type">
+                    self.insert_and_pop_element_for(tag);
+                    DoneAckSelfClosing
+                }
+
+                tag @ <title> => {
+                    self.parse_raw_data(tag, Rcdata)
+                }
+
+                tag @ <noframes> <style> <noscript> => {
+                    if (!self.opts.scripting_enabled) && (tag.name == local_name!("noscript")) {
+                        self.insert_element_for(tag);
+                        self.mode = InHeadNoscript;
+                        Done
+                    } else {
+                        self.parse_raw_data(tag, Rawtext)
+                    }
+                }
+
+                tag @ <script> => {
+                    let elem = create_element(
+                        &mut self.sink, QualName::new(None, ns!(html), local_name!("script")),
+                        tag.attrs);
+                    if self.is_fragment() {
+                        self.sink.mark_script_already_started(&elem);
+                    }
+                    self.insert_appropriately(AppendNode(elem.clone()), None);
+                    self.open_elems.push(elem);
+                    self.to_raw_text_mode(ScriptData)
+                }
+
+                </head> => {
+                    self.pop();
+                    self.mode = AfterHead;
+                    Done
+                }
+
+                </body> </html> </br> => else,
+
+                tag @ <template> => {
+                    self.insert_element_for(tag);
+                    self.active_formatting.push(Marker);
+                    self.frameset_ok = false;
+                    self.mode = InTemplate;
+                    self.template_modes.push(InTemplate);
+                    Done
+                }
+
+                tag @ </template> => {
+                    if !self.in_html_elem_named(local_name!("template")) {
+                        self.unexpected(&tag);
+                    } else {
+                        self.generate_implied_end(thorough_implied_end);
+                        self.expect_to_close(local_name!("template"));
+                        self.clear_active_formatting_to_marker();
+                        self.template_modes.pop();
+                        self.mode = self.reset_insertion_mode();
+                    }
+                    Done
+                }
+
+                <head> => self.unexpected(&token),
+                tag @ </_> => self.unexpected(&tag),
+
+                token => {
+                    self.pop();
+                    Reprocess(AfterHead, token)
+                }
+            }),
+
+            //§ parsing-main-inheadnoscript
+            InHeadNoscript => match_token!(token {
+                <html> => self.step(InBody, token),
+
+                </noscript> => {
+                    self.pop();
+                    self.mode = InHead;
+                    Done
+                },
+
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, _) => self.step(InHead, token),
+
+                CommentToken(_) => self.step(InHead, token),
+
+                <basefont> <bgsound> <link> <meta> <noframes> <style>
+                    => self.step(InHead, token),
+
+                </br> => else,
+
+                <head> <noscript> => self.unexpected(&token),
+                tag @ </_> => self.unexpected(&tag),
+
+                token => {
+                    self.unexpected(&token);
+                    self.pop();
+                    Reprocess(InHead, token)
+                },
+            }),
+
+            //§ the-after-head-insertion-mode
+            AfterHead => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, text) => self.append_text(text),
+                CommentToken(text) => self.append_comment(text),
+
+                <html> => self.step(InBody, token),
+
+                tag @ <body> => {
+                    self.insert_element_for(tag);
+                    self.frameset_ok = false;
+                    self.mode = InBody;
+                    Done
+                }
+
+                tag @ <frameset> => {
+                    self.insert_element_for(tag);
+                    self.mode = InFrameset;
+                    Done
+                }
+
+                <base> <basefont> <bgsound> <link> <meta>
+                      <noframes> <script> <style> <template> <title> => {
+                    self.unexpected(&token);
+                    let head = self.head_elem.as_ref().expect("no head element").clone();
+                    self.push(&head);
+                    let result = self.step(InHead, token);
+                    self.remove_from_stack(&head);
+                    result
+                }
+
+                </template> => self.step(InHead, token),
+
+                </body> </html> </br> => else,
+
+                <head> => self.unexpected(&token),
+                tag @ </_> => self.unexpected(&tag),
+
+                token => {
+                    self.insert_phantom(local_name!("body"));
+                    Reprocess(InBody, token)
+                }
+            }),
+
+            //§ parsing-main-inbody
+            InBody => match_token!(token {
+                NullCharacterToken => self.unexpected(&token),
+
+                CharacterTokens(_, text) => {
+                    self.reconstruct_formatting();
+                    if any_not_whitespace(&text) {
+                        self.frameset_ok = false;
+                    }
+                    self.append_text(text)
+                }
+
+                CommentToken(text) => self.append_comment(text),
+
+                tag @ <html> => {
+                    self.unexpected(&tag);
+                    if !self.in_html_elem_named(local_name!("template")) {
+                        let top = html_elem(&self.open_elems);
+                        self.sink.add_attrs_if_missing(top, tag.attrs);
+                    }
+                    Done
+                }
+
+                <base> <basefont> <bgsound> <link> <meta> <noframes>
+                  <script> <style> <template> <title> </template> => {
+                    self.step(InHead, token)
+                }
+
+                tag @ <body> => {
+                    self.unexpected(&tag);
+                    match self.body_elem().cloned() {
+                        Some(ref node) if self.open_elems.len() != 1 &&
+                                          !self.in_html_elem_named(local_name!("template")) => {
+                            self.frameset_ok = false;
+                            self.sink.add_attrs_if_missing(node, tag.attrs)
+                        },
+                        _ => {}
+                    }
+                    Done
+                }
+
+                tag @ <frameset> => {
+                    self.unexpected(&tag);
+                    if !self.frameset_ok { return Done; }
+
+                    let body = unwrap_or_return!(self.body_elem(), Done).clone();
+                    self.sink.remove_from_parent(&body);
+
+                    // FIXME: can we get here in the fragment case?
+                    // What to do with the first element then?
+                    self.open_elems.truncate(1);
+                    self.insert_element_for(tag);
+                    self.mode = InFrameset;
+                    Done
+                }
+
+                EOFToken => {
+                    if !self.template_modes.is_empty() {
+                        self.step(InTemplate, token)
+                    } else {
+                        self.check_body_end();
+                        self.stop_parsing()
+                    }
+                }
+
+                </body> => {
+                    if self.in_scope_named(default_scope, local_name!("body")) {
+                        self.check_body_end();
+                        self.mode = AfterBody;
+                    } else {
+                        self.sink.parse_error(Borrowed("</body> with no <body> in scope"));
+                    }
+                    Done
+                }
+
+                </html> => {
+                    if self.in_scope_named(default_scope, local_name!("body")) {
+                        self.check_body_end();
+                        Reprocess(AfterBody, token)
+                    } else {
+                        self.sink.parse_error(Borrowed("</html> with no <body> in scope"));
+                        Done
+                    }
+                }
+
+                tag @ <address> <article> <aside> <blockquote> <center> <details> <dialog>
+                  <dir> <div> <dl> <fieldset> <figcaption> <figure> <footer> <header>
+                  <hgroup> <main> <nav> <ol> <p> <section> <summary> <ul> => {
+                    self.close_p_element_in_button_scope();
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                tag @ <menu> => {
+                    self.close_p_element_in_button_scope();
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                tag @ <h1> <h2> <h3> <h4> <h5> <h6> => {
+                    self.close_p_element_in_button_scope();
+                    if self.current_node_in(heading_tag) {
+                        self.sink.parse_error(Borrowed("nested heading tags"));
+                        self.pop();
+                    }
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                tag @ <pre> <listing> => {
+                    self.close_p_element_in_button_scope();
+                    self.insert_element_for(tag);
+                    self.ignore_lf = true;
+                    self.frameset_ok = false;
+                    Done
+                }
+
+                tag @ <form> => {
+                    if self.form_elem.is_some() &&
+                       !self.in_html_elem_named(local_name!("template")) {
+                        self.sink.parse_error(Borrowed("nested forms"));
+                    } else {
+                        self.close_p_element_in_button_scope();
+                        let elem = self.insert_element_for(tag);
+                        if !self.in_html_elem_named(local_name!("template")) {
+                            self.form_elem = Some(elem);
+                        }
+                    }
+                    Done
+                }
+
+                tag @ <li> <dd> <dt> => {
+                    declare_tag_set!(close_list = "li");
+                    declare_tag_set!(close_defn = "dd" "dt");
+                    declare_tag_set!(extra_special = [special_tag] - "address" "div" "p");
+                    let list = match tag.name {
+                        local_name!("li") => true,
+                        local_name!("dd") | local_name!("dt") => false,
+                        _ => unreachable!(),
+                    };
+
+                    self.frameset_ok = false;
+
+                    let mut to_close = None;
+                    for node in self.open_elems.iter().rev() {
+                        let name = self.sink.elem_name(node);
+                        let can_close = if list {
+                            close_list(name)
+                        } else {
+                            close_defn(name)
+                        };
+                        if can_close {
+                            to_close = Some(name.local.clone());
+                            break;
+                        }
+                        if extra_special(name) {
+                            break;
+                        }
+                    }
+
+                    match to_close {
+                        Some(name) => {
+                            self.generate_implied_end_except(name.clone());
+                            self.expect_to_close(name);
+                        }
+                        None => (),
+                    }
+
+                    self.close_p_element_in_button_scope();
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                tag @ <plaintext> => {
+                    self.close_p_element_in_button_scope();
+                    self.insert_element_for(tag);
+                    ToPlaintext
+                }
+
+                tag @ <button> => {
+                    if self.in_scope_named(default_scope, local_name!("button")) {
+                        self.sink.parse_error(Borrowed("nested buttons"));
+                        self.generate_implied_end(cursory_implied_end);
+                        self.pop_until_named(local_name!("button"));
+                    }
+                    self.reconstruct_formatting();
+                    self.insert_element_for(tag);
+                    self.frameset_ok = false;
+                    Done
+                }
+
+                tag @ </address> </article> </aside> </blockquote> </button> </center>
+                  </details> </dialog> </dir> </div> </dl> </fieldset> </figcaption>
+                  </figure> </footer> </header> </hgroup> </listing> </main> </menu>
+                  </nav> </ol> </pre> </section> </summary> </ul> => {
+                    if !self.in_scope_named(default_scope, tag.name.clone()) {
+                        self.unexpected(&tag);
+                    } else {
+                        self.generate_implied_end(cursory_implied_end);
+                        self.expect_to_close(tag.name);
+                    }
+                    Done
+                }
+
+                </form> => {
+                    if !self.in_html_elem_named(local_name!("template")) {
+                        // Can't use unwrap_or_return!() due to rust-lang/rust#16617.
+                        let node = match self.form_elem.take() {
+                            None => {
+                                self.sink.parse_error(Borrowed("Null form element pointer on </form>"));
+                                return Done;
+                            }
+                            Some(x) => x,
+                        };
+                        if !self.in_scope(default_scope, |n| self.sink.same_node(&node, &n)) {
+                            self.sink.parse_error(Borrowed("Form element not in scope on </form>"));
+                            return Done;
+                        }
+                        self.generate_implied_end(cursory_implied_end);
+                        let current = self.current_node().clone();
+                        self.remove_from_stack(&node);
+                        if !self.sink.same_node(&current, &node) {
+                            self.sink.parse_error(Borrowed("Bad open element on </form>"));
+                        }
+                    } else {
+                        if !self.in_scope_named(default_scope, local_name!("form")) {
+                            self.sink.parse_error(Borrowed("Form element not in scope on </form>"));
+                            return Done;
+                        }
+                        self.generate_implied_end(cursory_implied_end);
+                        if !self.current_node_named(local_name!("form")) {
+                            self.sink.parse_error(Borrowed("Bad open element on </form>"));
+                        }
+                        self.pop_until_named(local_name!("form"));
+                    }
+                    Done
+                }
+
+                </p> => {
+                    if !self.in_scope_named(button_scope, local_name!("p")) {
+                        self.sink.parse_error(Borrowed("No <p> tag to close"));
+                        self.insert_phantom(local_name!("p"));
+                    }
+                    self.close_p_element();
+                    Done
+                }
+
+                tag @ </li> </dd> </dt> => {
+                    let in_scope = if tag.name == local_name!("li") {
+                        self.in_scope_named(list_item_scope, tag.name.clone())
+                    } else {
+                        self.in_scope_named(default_scope, tag.name.clone())
+                    };
+                    if in_scope {
+                        self.generate_implied_end_except(tag.name.clone());
+                        self.expect_to_close(tag.name);
+                    } else {
+                        self.sink.parse_error(Borrowed("No matching tag to close"));
+                    }
+                    Done
+                }
+
+                tag @ </h1> </h2> </h3> </h4> </h5> </h6> => {
+                    if self.in_scope(default_scope, |n| self.elem_in(&n, heading_tag)) {
+                        self.generate_implied_end(cursory_implied_end);
+                        if !self.current_node_named(tag.name) {
+                            self.sink.parse_error(Borrowed("Closing wrong heading tag"));
+                        }
+                        self.pop_until(heading_tag);
+                    } else {
+                        self.sink.parse_error(Borrowed("No heading tag to close"));
+                    }
+                    Done
+                }
+
+                tag @ <a> => {
+                    self.handle_misnested_a_tags(&tag);
+                    self.reconstruct_formatting();
+                    self.create_formatting_element_for(tag);
+                    Done
+                }
+
+                tag @ <b> <big> <code> <em> <font> <i> <s> <small> <strike> <strong> <tt> <u> => {
+                    self.reconstruct_formatting();
+                    self.create_formatting_element_for(tag);
+                    Done
+                }
+
+                tag @ <nobr> => {
+                    self.reconstruct_formatting();
+                    if self.in_scope_named(default_scope, local_name!("nobr")) {
+                        self.sink.parse_error(Borrowed("Nested <nobr>"));
+                        self.adoption_agency(local_name!("nobr"));
+                        self.reconstruct_formatting();
+                    }
+                    self.create_formatting_element_for(tag);
+                    Done
+                }
+
+                tag @ </a> </b> </big> </code> </em> </font> </i> </nobr>
+                  </s> </small> </strike> </strong> </tt> </u> => {
+                    self.adoption_agency(tag.name);
+                    Done
+                }
+
+                tag @ <applet> <marquee> <object> => {
+                    self.reconstruct_formatting();
+                    self.insert_element_for(tag);
+                    self.active_formatting.push(Marker);
+                    self.frameset_ok = false;
+                    Done
+                }
+
+                tag @ </applet> </marquee> </object> => {
+                    if !self.in_scope_named(default_scope, tag.name.clone()) {
+                        self.unexpected(&tag);
+                    } else {
+                        self.generate_implied_end(cursory_implied_end);
+                        self.expect_to_close(tag.name);
+                        self.clear_active_formatting_to_marker();
+                    }
+                    Done
+                }
+
+                tag @ <table> => {
+                    if self.quirks_mode != Quirks {
+                        self.close_p_element_in_button_scope();
+                    }
+                    self.insert_element_for(tag);
+                    self.frameset_ok = false;
+                    self.mode = InTable;
+                    Done
+                }
+
+                tag @ </br> => {
+                    self.unexpected(&tag);
+                    self.step(InBody, TagToken(Tag {
+                        kind: StartTag,
+                        attrs: vec!(),
+                        ..tag
+                    }))
+                }
+
+                tag @ <area> <br> <embed> <img> <keygen> <wbr> <input> => {
+                    let keep_frameset_ok = match tag.name {
+                        local_name!("input") => self.is_type_hidden(&tag),
+                        _ => false,
+                    };
+                    self.reconstruct_formatting();
+                    self.insert_and_pop_element_for(tag);
+                    if !keep_frameset_ok {
+                        self.frameset_ok = false;
+                    }
+                    DoneAckSelfClosing
+                }
+
+                tag @ <param> <source> <track> => {
+                    self.insert_and_pop_element_for(tag);
+                    DoneAckSelfClosing
+                }
+
+                tag @ <hr> => {
+                    self.close_p_element_in_button_scope();
+                    self.insert_and_pop_element_for(tag);
+                    self.frameset_ok = false;
+                    DoneAckSelfClosing
+                }
+
+                tag @ <image> => {
+                    self.unexpected(&tag);
+                    self.step(InBody, TagToken(Tag {
+                        name: local_name!("img"),
+                        ..tag
+                    }))
+                }
+
+                tag @ <textarea> => {
+                    self.ignore_lf = true;
+                    self.frameset_ok = false;
+                    self.parse_raw_data(tag, Rcdata)
+                }
+
+                tag @ <xmp> => {
+                    self.close_p_element_in_button_scope();
+                    self.reconstruct_formatting();
+                    self.frameset_ok = false;
+                    self.parse_raw_data(tag, Rawtext)
+                }
+
+                tag @ <iframe> => {
+                    self.frameset_ok = false;
+                    self.parse_raw_data(tag, Rawtext)
+                }
+
+                tag @ <noembed> => {
+                    self.parse_raw_data(tag, Rawtext)
+                }
+
+                // <noscript> handled in wildcard case below
+
+                tag @ <select> => {
+                    self.reconstruct_formatting();
+                    self.insert_element_for(tag);
+                    self.frameset_ok = false;
+                    // NB: mode == InBody but possibly self.mode != mode, if
+                    // we're processing "as in the rules for InBody".
+                    self.mode = match self.mode {
+                        InTable | InCaption | InTableBody
+                            | InRow | InCell => InSelectInTable,
+                        _ => InSelect,
+                    };
+                    Done
+                }
+
+                tag @ <optgroup> <option> => {
+                    if self.current_node_named(local_name!("option")) {
+                        self.pop();
+                    }
+                    self.reconstruct_formatting();
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                tag @ <rb> <rtc> => {
+                    if self.in_scope_named(default_scope, local_name!("ruby")) {
+                        self.generate_implied_end(cursory_implied_end);
+                    }
+                    if !self.current_node_named(local_name!("ruby")) {
+                        self.unexpected(&tag);
+                    }
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                tag @ <rp> <rt> => {
+                    if self.in_scope_named(default_scope, local_name!("ruby")) {
+                        self.generate_implied_end_except(local_name!("rtc"));
+                    }
+                    if !self.current_node_named(local_name!("rtc")) && !self.current_node_named(local_name!("ruby")) {
+                        self.unexpected(&tag);
+                    }
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                tag @ <math> => self.enter_foreign(tag, ns!(mathml)),
+
+                tag @ <svg> => self.enter_foreign(tag, ns!(svg)),
+
+                <caption> <col> <colgroup> <frame> <head>
+                  <tbody> <td> <tfoot> <th> <thead> <tr> => {
+                    self.unexpected(&token);
+                    Done
+                }
+
+                tag @ <_> => {
+                    if self.opts.scripting_enabled && tag.name == local_name!("noscript") {
+                        self.parse_raw_data(tag, Rawtext)
+                    } else {
+                        self.reconstruct_formatting();
+                        self.insert_element_for(tag);
+                        Done
+                    }
+                }
+
+                tag @ </_> => {
+                    self.process_end_tag_in_body(tag);
+                    Done
+                }
+
+                // FIXME: This should be unreachable, but match_token requires a
+                // catch-all case.
+                _ => panic!("impossible case in InBody mode"),
+            }),
+
+            //§ parsing-main-incdata
+            Text => match_token!(token {
+                CharacterTokens(_, text) => self.append_text(text),
+
+                EOFToken => {
+                    self.unexpected(&token);
+                    if self.current_node_named(local_name!("script")) {
+                        let current = current_node(&self.open_elems);
+                        self.sink.mark_script_already_started(current);
+                    }
+                    self.pop();
+                    Reprocess(self.orig_mode.take().unwrap(), token)
+                }
+
+                tag @ </_> => {
+                    let node = self.pop();
+                    self.mode = self.orig_mode.take().unwrap();
+                    if tag.name == local_name!("script") {
+                        return Script(node);
+                    }
+                    Done
+                }
+
+                // The spec doesn't say what to do here.
+                // Other tokens are impossible?
+                _ => panic!("impossible case in Text mode"),
+            }),
+
+            //§ parsing-main-intable
+            InTable => match_token!(token {
+                // FIXME: hack, should implement pat | pat for match_token instead
+                NullCharacterToken => self.process_chars_in_table(token),
+
+                CharacterTokens(..) => self.process_chars_in_table(token),
+
+                CommentToken(text) => self.append_comment(text),
+
+                tag @ <caption> => {
+                    self.pop_until_current(table_scope);
+                    self.active_formatting.push(Marker);
+                    self.insert_element_for(tag);
+                    self.mode = InCaption;
+                    Done
+                }
+
+                tag @ <colgroup> => {
+                    self.pop_until_current(table_scope);
+                    self.insert_element_for(tag);
+                    self.mode = InColumnGroup;
+                    Done
+                }
+
+                <col> => {
+                    self.pop_until_current(table_scope);
+                    self.insert_phantom(local_name!("colgroup"));
+                    Reprocess(InColumnGroup, token)
+                }
+
+                tag @ <tbody> <tfoot> <thead> => {
+                    self.pop_until_current(table_scope);
+                    self.insert_element_for(tag);
+                    self.mode = InTableBody;
+                    Done
+                }
+
+                <td> <th> <tr> => {
+                    self.pop_until_current(table_scope);
+                    self.insert_phantom(local_name!("tbody"));
+                    Reprocess(InTableBody, token)
+                }
+
+                <table> => {
+                    self.unexpected(&token);
+                    if self.in_scope_named(table_scope, local_name!("table")) {
+                        self.pop_until_named(local_name!("table"));
+                        Reprocess(self.reset_insertion_mode(), token)
+                    } else {
+                        Done
+                    }
+                }
+
+                </table> => {
+                    if self.in_scope_named(table_scope, local_name!("table")) {
+                        self.pop_until_named(local_name!("table"));
+                        self.mode = self.reset_insertion_mode();
+                    } else {
+                        self.unexpected(&token);
+                    }
+                    Done
+                }
+
+                </body> </caption> </col> </colgroup> </html>
+                  </tbody> </td> </tfoot> </th> </thead> </tr> =>
+                    self.unexpected(&token),
+
+                <style> <script> <template> </template>
+                    => self.step(InHead, token),
+
+                tag @ <input> => {
+                    self.unexpected(&tag);
+                    if self.is_type_hidden(&tag) {
+                        self.insert_and_pop_element_for(tag);
+                        DoneAckSelfClosing
+                    } else {
+                        self.foster_parent_in_body(TagToken(tag))
+                    }
+                }
+
+                tag @ <form> => {
+                    self.unexpected(&tag);
+                    if !self.in_html_elem_named(local_name!("template")) && self.form_elem.is_none() {
+                        self.form_elem = Some(self.insert_and_pop_element_for(tag));
+                    }
+                    Done
+                }
+
+                EOFToken => self.step(InBody, token),
+
+                token => {
+                    self.unexpected(&token);
+                    self.foster_parent_in_body(token)
+                }
+            }),
+
+            //§ parsing-main-intabletext
+            InTableText => match_token!(token {
+                NullCharacterToken => self.unexpected(&token),
+
+                CharacterTokens(split, text) => {
+                    self.pending_table_text.push((split, text));
+                    Done
+                }
+
+                token => {
+                    let pending = replace(&mut self.pending_table_text, vec!());
+                    let contains_nonspace = pending.iter().any(|&(split, ref text)| {
+                        match split {
+                            Whitespace => false,
+                            NotWhitespace => true,
+                            NotSplit => any_not_whitespace(text),
+                        }
+                    });
+
+                    if contains_nonspace {
+                        self.sink.parse_error(Borrowed("Non-space table text"));
+                        for (split, text) in pending.into_iter() {
+                            match self.foster_parent_in_body(CharacterTokens(split, text)) {
+                                Done => (),
+                                _ => panic!("not prepared to handle this!"),
+                            }
+                        }
+                    } else {
+                        for (_, text) in pending.into_iter() {
+                            self.append_text(text);
+                        }
+                    }
+
+                    Reprocess(self.orig_mode.take().unwrap(), token)
+                }
+            }),
+
+            //§ parsing-main-incaption
+            InCaption => match_token!(token {
+                tag @ <caption> <col> <colgroup> <tbody> <td> <tfoot>
+                  <th> <thead> <tr> </table> </caption> => {
+                    if self.in_scope_named(table_scope, local_name!("caption")) {
+                        self.generate_implied_end(cursory_implied_end);
+                        self.expect_to_close(local_name!("caption"));
+                        self.clear_active_formatting_to_marker();
+                        match tag {
+                            Tag { kind: EndTag, name: local_name!("caption"), .. } => {
+                                self.mode = InTable;
+                                Done
+                            }
+                            _ => Reprocess(InTable, TagToken(tag))
+                        }
+                    } else {
+                        self.unexpected(&tag);
+                        Done
+                    }
+                }
+
+                </body> </col> </colgroup> </html> </tbody>
+                  </td> </tfoot> </th> </thead> </tr> => self.unexpected(&token),
+
+                token => self.step(InBody, token),
+            }),
+
+            //§ parsing-main-incolgroup
+            InColumnGroup => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, text) => self.append_text(text),
+                CommentToken(text) => self.append_comment(text),
+
+                <html> => self.step(InBody, token),
+
+                tag @ <col> => {
+                    self.insert_and_pop_element_for(tag);
+                    DoneAckSelfClosing
+                }
+
+                </colgroup> => {
+                    if self.current_node_named(local_name!("colgroup")) {
+                        self.pop();
+                        self.mode = InTable;
+                    } else {
+                        self.unexpected(&token);
+                    }
+                    Done
+                }
+
+                </col> => self.unexpected(&token),
+
+                <template> </template> => self.step(InHead, token),
+
+                EOFToken => self.step(InBody, token),
+
+                token => {
+                    if self.current_node_named(local_name!("colgroup")) {
+                        self.pop();
+                        Reprocess(InTable, token)
+                    } else {
+                        self.unexpected(&token)
+                    }
+                }
+            }),
+
+            //§ parsing-main-intbody
+            InTableBody => match_token!(token {
+                tag @ <tr> => {
+                    self.pop_until_current(table_body_context);
+                    self.insert_element_for(tag);
+                    self.mode = InRow;
+                    Done
+                }
+
+                <th> <td> => {
+                    self.unexpected(&token);
+                    self.pop_until_current(table_body_context);
+                    self.insert_phantom(local_name!("tr"));
+                    Reprocess(InRow, token)
+                }
+
+                tag @ </tbody> </tfoot> </thead> => {
+                    if self.in_scope_named(table_scope, tag.name.clone()) {
+                        self.pop_until_current(table_body_context);
+                        self.pop();
+                        self.mode = InTable;
+                    } else {
+                        self.unexpected(&tag);
+                    }
+                    Done
+                }
+
+                <caption> <col> <colgroup> <tbody> <tfoot> <thead> </table> => {
+                    declare_tag_set!(table_outer = "table" "tbody" "tfoot");
+                    if self.in_scope(table_scope, |e| self.elem_in(&e, table_outer)) {
+                        self.pop_until_current(table_body_context);
+                        self.pop();
+                        Reprocess(InTable, token)
+                    } else {
+                        self.unexpected(&token)
+                    }
+                }
+
+                </body> </caption> </col> </colgroup> </html> </td> </th> </tr>
+                    => self.unexpected(&token),
+
+                token => self.step(InTable, token),
+            }),
+
+            //§ parsing-main-intr
+            InRow => match_token!(token {
+                tag @ <th> <td> => {
+                    self.pop_until_current(table_row_context);
+                    self.insert_element_for(tag);
+                    self.mode = InCell;
+                    self.active_formatting.push(Marker);
+                    Done
+                }
+
+                </tr> => {
+                    if self.in_scope_named(table_scope, local_name!("tr")) {
+                        self.pop_until_current(table_row_context);
+                        let node = self.pop();
+                        self.assert_named(&node, local_name!("tr"));
+                        self.mode = InTableBody;
+                    } else {
+                        self.unexpected(&token);
+                    }
+                    Done
+                }
+
+                <caption> <col> <colgroup> <tbody> <tfoot> <thead> <tr> </table> => {
+                    if self.in_scope_named(table_scope, local_name!("tr")) {
+                        self.pop_until_current(table_row_context);
+                        let node = self.pop();
+                        self.assert_named(&node, local_name!("tr"));
+                        Reprocess(InTableBody, token)
+                    } else {
+                        self.unexpected(&token)
+                    }
+                }
+
+                tag @ </tbody> </tfoot> </thead> => {
+                    if self.in_scope_named(table_scope, tag.name.clone()) {
+                        if self.in_scope_named(table_scope, local_name!("tr")) {
+                            self.pop_until_current(table_row_context);
+                            let node = self.pop();
+                            self.assert_named(&node, local_name!("tr"));
+                            Reprocess(InTableBody, TagToken(tag))
+                        } else {
+                            Done
+                        }
+                    } else {
+                        self.unexpected(&tag)
+                    }
+                }
+
+                </body> </caption> </col> </colgroup> </html> </td> </th>
+                    => self.unexpected(&token),
+
+                token => self.step(InTable, token),
+            }),
+
+            //§ parsing-main-intd
+            InCell => match_token!(token {
+                tag @ </td> </th> => {
+                    if self.in_scope_named(table_scope, tag.name.clone()) {
+                        self.generate_implied_end(cursory_implied_end);
+                        self.expect_to_close(tag.name);
+                        self.clear_active_formatting_to_marker();
+                        self.mode = InRow;
+                    } else {
+                        self.unexpected(&tag);
+                    }
+                    Done
+                }
+
+                <caption> <col> <colgroup> <tbody> <td> <tfoot> <th> <thead> <tr> => {
+                    if self.in_scope(table_scope, |n| self.elem_in(&n, td_th)) {
+                        self.close_the_cell();
+                        Reprocess(InRow, token)
+                    } else {
+                        self.unexpected(&token)
+                    }
+                }
+
+                </body> </caption> </col> </colgroup> </html>
+                    => self.unexpected(&token),
+
+                tag @ </table> </tbody> </tfoot> </thead> </tr> => {
+                    if self.in_scope_named(table_scope, tag.name.clone()) {
+                        self.close_the_cell();
+                        Reprocess(InRow, TagToken(tag))
+                    } else {
+                        self.unexpected(&tag)
+                    }
+                }
+
+                token => self.step(InBody, token),
+            }),
+
+            //§ parsing-main-inselect
+            InSelect => match_token!(token {
+                NullCharacterToken => self.unexpected(&token),
+                CharacterTokens(_, text) => self.append_text(text),
+                CommentToken(text) => self.append_comment(text),
+
+                <html> => self.step(InBody, token),
+
+                tag @ <option> => {
+                    if self.current_node_named(local_name!("option")) {
+                        self.pop();
+                    }
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                tag @ <optgroup> => {
+                    if self.current_node_named(local_name!("option")) {
+                        self.pop();
+                    }
+                    if self.current_node_named(local_name!("optgroup")) {
+                        self.pop();
+                    }
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                </optgroup> => {
+                    if self.open_elems.len() >= 2
+                        && self.current_node_named(local_name!("option"))
+                        && self.html_elem_named(&self.open_elems[self.open_elems.len() - 2],
+                            local_name!("optgroup")) {
+                        self.pop();
+                    }
+                    if self.current_node_named(local_name!("optgroup")) {
+                        self.pop();
+                    } else {
+                        self.unexpected(&token);
+                    }
+                    Done
+                }
+
+                </option> => {
+                    if self.current_node_named(local_name!("option")) {
+                        self.pop();
+                    } else {
+                        self.unexpected(&token);
+                    }
+                    Done
+                }
+
+                tag @ <select> </select> => {
+                    let in_scope = self.in_scope_named(select_scope, local_name!("select"));
+
+                    if !in_scope || tag.kind == StartTag {
+                        self.unexpected(&tag);
+                    }
+
+                    if in_scope {
+                        self.pop_until_named(local_name!("select"));
+                        self.mode = self.reset_insertion_mode();
+                    }
+                    Done
+                }
+
+                <input> <keygen> <textarea> => {
+                    self.unexpected(&token);
+                    if self.in_scope_named(select_scope, local_name!("select")) {
+                        self.pop_until_named(local_name!("select"));
+                        Reprocess(self.reset_insertion_mode(), token)
+                    } else {
+                        Done
+                    }
+                }
+
+                <script> <template> </template> => self.step(InHead, token),
+
+                EOFToken => self.step(InBody, token),
+
+                token => self.unexpected(&token),
+            }),
+
+            //§ parsing-main-inselectintable
+            InSelectInTable => match_token!(token {
+                <caption> <table> <tbody> <tfoot> <thead> <tr> <td> <th> => {
+                    self.unexpected(&token);
+                    self.pop_until_named(local_name!("select"));
+                    Reprocess(self.reset_insertion_mode(), token)
+                }
+
+                tag @ </caption> </table> </tbody> </tfoot> </thead> </tr> </td> </th> => {
+                    self.unexpected(&tag);
+                    if self.in_scope_named(table_scope, tag.name.clone()) {
+                        self.pop_until_named(local_name!("select"));
+                        Reprocess(self.reset_insertion_mode(), TagToken(tag))
+                    } else {
+                        Done
+                    }
+                }
+
+                token => self.step(InSelect, token),
+            }),
+
+            //§ parsing-main-intemplate
+            InTemplate => match_token!(token {
+                CharacterTokens(_, _) => self.step(InBody, token),
+                CommentToken(_) => self.step(InBody, token),
+
+                <base> <basefont> <bgsound> <link> <meta> <noframes> <script>
+                <style> <template> <title> </template> => {
+                    self.step(InHead, token)
+                }
+
+                <caption> <colgroup> <tbody> <tfoot> <thead> => {
+                    self.template_modes.pop();
+                    self.template_modes.push(InTable);
+                    Reprocess(InTable, token)
+                }
+
+                <col> => {
+                    self.template_modes.pop();
+                    self.template_modes.push(InColumnGroup);
+                    Reprocess(InColumnGroup, token)
+                }
+
+                <tr> => {
+                    self.template_modes.pop();
+                    self.template_modes.push(InTableBody);
+                    Reprocess(InTableBody, token)
+                }
+
+                <td> <th> => {
+                    self.template_modes.pop();
+                    self.template_modes.push(InRow);
+                    Reprocess(InRow, token)
+                }
+
+                EOFToken => {
+                    if !self.in_html_elem_named(local_name!("template")) {
+                        self.stop_parsing()
+                    } else {
+                        self.unexpected(&token);
+                        self.pop_until_named(local_name!("template"));
+                        self.clear_active_formatting_to_marker();
+                        self.template_modes.pop();
+                        self.mode = self.reset_insertion_mode();
+                        Reprocess(self.reset_insertion_mode(), token)
+                    }
+                }
+
+                tag @ <_> => {
+                    self.template_modes.pop();
+                    self.template_modes.push(InBody);
+                    Reprocess(InBody, TagToken(tag))
+                }
+
+                token => self.unexpected(&token),
+            }),
+
+            //§ parsing-main-afterbody
+            AfterBody => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, _) => self.step(InBody, token),
+                CommentToken(text) => self.append_comment_to_html(text),
+
+                <html> => self.step(InBody, token),
+
+                </html> => {
+                    if self.is_fragment() {
+                        self.unexpected(&token);
+                    } else {
+                        self.mode = AfterAfterBody;
+                    }
+                    Done
+                }
+
+                EOFToken => self.stop_parsing(),
+
+                token => {
+                    self.unexpected(&token);
+                    Reprocess(InBody, token)
+                }
+            }),
+
+            //§ parsing-main-inframeset
+            InFrameset => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, text) => self.append_text(text),
+                CommentToken(text) => self.append_comment(text),
+
+                <html> => self.step(InBody, token),
+
+                tag @ <frameset> => {
+                    self.insert_element_for(tag);
+                    Done
+                }
+
+                </frameset> => {
+                    if self.open_elems.len() == 1 {
+                        self.unexpected(&token);
+                    } else {
+                        self.pop();
+                        if !self.is_fragment() && !self.current_node_named(local_name!("frameset")) {
+                            self.mode = AfterFrameset;
+                        }
+                    }
+                    Done
+                }
+
+                tag @ <frame> => {
+                    self.insert_and_pop_element_for(tag);
+                    DoneAckSelfClosing
+                }
+
+                <noframes> => self.step(InHead, token),
+
+                EOFToken => {
+                    if self.open_elems.len() != 1 {
+                        self.unexpected(&token);
+                    }
+                    self.stop_parsing()
+                }
+
+                token => self.unexpected(&token),
+            }),
+
+            //§ parsing-main-afterframeset
+            AfterFrameset => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, text) => self.append_text(text),
+                CommentToken(text) => self.append_comment(text),
+
+                <html> => self.step(InBody, token),
+
+                </html> => {
+                    self.mode = AfterAfterFrameset;
+                    Done
+                }
+
+                <noframes> => self.step(InHead, token),
+
+                EOFToken => self.stop_parsing(),
+
+                token => self.unexpected(&token),
+            }),
+
+            //§ the-after-after-body-insertion-mode
+            AfterAfterBody => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, _) => self.step(InBody, token),
+                CommentToken(text) => self.append_comment_to_doc(text),
+
+                <html> => self.step(InBody, token),
+
+                EOFToken => self.stop_parsing(),
+
+                token => {
+                    self.unexpected(&token);
+                    Reprocess(InBody, token)
+                }
+            }),
+
+            //§ the-after-after-frameset-insertion-mode
+            AfterAfterFrameset => match_token!(token {
+                CharacterTokens(NotSplit, text) => SplitWhitespace(text),
+                CharacterTokens(Whitespace, _) => self.step(InBody, token),
+                CommentToken(text) => self.append_comment_to_doc(text),
+
+                <html> => self.step(InBody, token),
+
+                EOFToken => self.stop_parsing(),
+
+                <noframes> => self.step(InHead, token),
+
+                token => self.unexpected(&token),
+            }),
+            //§ END
+        }
+    }
+
+    fn step_foreign(&mut self, token: Token) -> ProcessResult<Handle> {
+        match_token!(token {
+            NullCharacterToken => {
+                self.unexpected(&token);
+                self.append_text("\u{fffd}".to_tendril())
+            }
+
+            CharacterTokens(_, text) => {
+                if any_not_whitespace(&text) {
+                    self.frameset_ok = false;
+                }
+                self.append_text(text)
+            }
+
+            CommentToken(text) => self.append_comment(text),
+
+            tag @ <b> <big> <blockquote> <body> <br> <center> <code> <dd> <div> <dl>
+                <dt> <em> <embed> <h1> <h2> <h3> <h4> <h5> <h6> <head> <hr> <i>
+                <img> <li> <listing> <menu> <meta> <nobr> <ol> <p> <pre> <ruby>
+                <s> <small> <span> <strong> <strike> <sub> <sup> <table> <tt>
+                <u> <ul> <var> => self.unexpected_start_tag_in_foreign_content(tag),
+
+            tag @ <font> => {
+                let unexpected = tag.attrs.iter().any(|attr| {
+                    matches!(attr.name.expanded(),
+                             expanded_name!("", "color") |
+                             expanded_name!("", "face") |
+                             expanded_name!("", "size"))
+                });
+                if unexpected {
+                    self.unexpected_start_tag_in_foreign_content(tag)
+                } else {
+                    self.foreign_start_tag(tag)
+                }
+            }
+
+            tag @ <_> => self.foreign_start_tag(tag),
+
+            // FIXME(#118): </script> in SVG
+
+            tag @ </_> => {
+                let mut first = true;
+                let mut stack_idx = self.open_elems.len() - 1;
+                loop {
+                    if stack_idx == 0 {
+                        return Done;
+                    }
+
+                    let html;
+                    let eq;
+                    {
+                        let node_name = self.sink.elem_name(&self.open_elems[stack_idx]);
+                        html = *node_name.ns == ns!(html);
+                        eq = node_name.local.eq_ignore_ascii_case(&tag.name);
+                    }
+                    if !first && html {
+                        let mode = self.mode;
+                        return self.step(mode, TagToken(tag));
+                    }
+
+                    if eq {
+                        self.open_elems.truncate(stack_idx);
+                        return Done;
+                    }
+
+                    if first {
+                        self.unexpected(&tag);
+                        first = false;
+                    }
+                    stack_idx -= 1;
+                }
+            }
+
+            // FIXME: This should be unreachable, but match_token requires a
+            // catch-all case.
+            _ => panic!("impossible case in foreign content"),
+        })
+    }
+}
diff --git a/src/tree_builder/tag_sets.rs b/src/tree_builder/tag_sets.rs
new file mode 100644
index 0000000..377b34c
--- /dev/null
+++ b/src/tree_builder/tag_sets.rs
@@ -0,0 +1,115 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Various sets of HTML tag names, and macros for declaring them.
+
+use crate::ExpandedName;
+use mac::{_tt_as_expr_hack, matches};
+use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns};
+
+macro_rules! declare_tag_set_impl ( ($param:ident, $b:ident, $supr:ident, $($tag:tt)+) => (
+    match $param {
+        $( expanded_name!(html $tag) => $b, )+
+        p => $supr(p),
+    }
+));
+
+macro_rules! declare_tag_set_body (
+    ($param:ident = [$supr:ident] - $($tag:tt)+)
+        => ( declare_tag_set_impl!($param, false, $supr, $($tag)+) );
+
+    ($param:ident = [$supr:ident] + $($tag:tt)+)
+        => ( declare_tag_set_impl!($param, true, $supr, $($tag)+) );
+
+    ($param:ident = $($tag:tt)+)
+        => ( declare_tag_set_impl!($param, true, empty_set, $($tag)+) );
+);
+
+macro_rules! declare_tag_set (
+    (pub $name:ident = $($toks:tt)+) => (
+        pub fn $name(p: crate::ExpandedName) -> bool {
+            declare_tag_set_body!(p = $($toks)+)
+        }
+    );
+
+    ($name:ident = $($toks:tt)+) => (
+        fn $name(p: crate::ExpandedName) -> bool {
+            declare_tag_set_body!(p = $($toks)+)
+        }
+    );
+);
+
+#[inline(always)]
+pub fn empty_set(_: ExpandedName) -> bool {
+    false
+}
+#[inline(always)]
+pub fn full_set(_: ExpandedName) -> bool {
+    true
+}
+
+declare_tag_set!(pub html_default_scope =
+    "applet" "caption" "html" "table" "td" "th" "marquee" "object" "template");
+
+#[inline(always)]
+pub fn default_scope(name: ExpandedName) -> bool {
+    html_default_scope(name) ||
+        mathml_text_integration_point(name) ||
+        svg_html_integration_point(name)
+}
+
+declare_tag_set!(pub list_item_scope = [default_scope] + "ol" "ul");
+declare_tag_set!(pub button_scope = [default_scope] + "button");
+declare_tag_set!(pub table_scope = "html" "table" "template");
+declare_tag_set!(pub select_scope = [full_set] - "optgroup" "option");
+
+declare_tag_set!(pub table_body_context = "tbody" "tfoot" "thead" "template" "html");
+declare_tag_set!(pub table_row_context = "tr" "template" "html");
+declare_tag_set!(pub td_th = "td" "th");
+
+declare_tag_set!(pub cursory_implied_end =
+    "dd" "dt" "li" "option" "optgroup" "p" "rb" "rp" "rt" "rtc");
+
+declare_tag_set!(pub thorough_implied_end = [cursory_implied_end]
+    + "caption" "colgroup" "tbody" "td" "tfoot" "th" "thead" "tr");
+
+declare_tag_set!(pub heading_tag = "h1" "h2" "h3" "h4" "h5" "h6");
+
+declare_tag_set!(pub special_tag =
+    "address" "applet" "area" "article" "aside" "base" "basefont" "bgsound" "blockquote" "body"
+    "br" "button" "caption" "center" "col" "colgroup" "dd" "details" "dir" "div" "dl" "dt" "embed"
+    "fieldset" "figcaption" "figure" "footer" "form" "frame" "frameset" "h1" "h2" "h3" "h4" "h5"
+    "h6" "head" "header" "hgroup" "hr" "html" "iframe" "img" "input" "isindex" "li" "link"
+    "listing" "main" "marquee" "menu" "meta" "nav" "noembed" "noframes" "noscript"
+    "object" "ol" "p" "param" "plaintext" "pre" "script" "section" "select" "source" "style"
+    "summary" "table" "tbody" "td" "template" "textarea" "tfoot" "th" "thead" "title" "tr" "track"
+    "ul" "wbr" "xmp");
+//§ END
+
+pub fn mathml_text_integration_point(p: ExpandedName) -> bool {
+    matches!(
+        p,
+        expanded_name!(mathml "mi") |
+            expanded_name!(mathml "mo") |
+            expanded_name!(mathml "mn") |
+            expanded_name!(mathml "ms") |
+            expanded_name!(mathml "mtext")
+    )
+}
+
+/// https://html.spec.whatwg.org/multipage/#html-integration-point
+pub fn svg_html_integration_point(p: ExpandedName) -> bool {
+    // annotation-xml are handle in another place
+    matches!(
+        p,
+        expanded_name!(svg "foreignObject") |
+            expanded_name!(svg "desc") |
+            expanded_name!(svg "title")
+    )
+}
diff --git a/src/tree_builder/types.rs b/src/tree_builder/types.rs
new file mode 100644
index 0000000..e47d69b
--- /dev/null
+++ b/src/tree_builder/types.rs
@@ -0,0 +1,95 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Types used within the tree builder code.  Not exported to users.
+
+use crate::tokenizer::states::RawKind;
+use crate::tokenizer::Tag;
+
+use crate::tendril::StrTendril;
+
+pub use self::FormatEntry::*;
+pub use self::InsertionMode::*;
+pub use self::InsertionPoint::*;
+pub use self::ProcessResult::*;
+pub use self::SplitStatus::*;
+pub use self::Token::*;
+
+#[derive(PartialEq, Eq, Copy, Clone, Debug)]
+pub enum InsertionMode {
+    Initial,
+    BeforeHtml,
+    BeforeHead,
+    InHead,
+    InHeadNoscript,
+    AfterHead,
+    InBody,
+    Text,
+    InTable,
+    InTableText,
+    InCaption,
+    InColumnGroup,
+    InTableBody,
+    InRow,
+    InCell,
+    InSelect,
+    InSelectInTable,
+    InTemplate,
+    AfterBody,
+    InFrameset,
+    AfterFrameset,
+    AfterAfterBody,
+    AfterAfterFrameset,
+}
+
+#[derive(PartialEq, Eq, Copy, Clone, Debug)]
+pub enum SplitStatus {
+    NotSplit,
+    Whitespace,
+    NotWhitespace,
+}
+
+/// A subset/refinement of `tokenizer::Token`.  Everything else is handled
+/// specially at the beginning of `process_token`.
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub enum Token {
+    TagToken(Tag),
+    CommentToken(StrTendril),
+    CharacterTokens(SplitStatus, StrTendril),
+    NullCharacterToken,
+    EOFToken,
+}
+
+pub enum ProcessResult<Handle> {
+    Done,
+    DoneAckSelfClosing,
+    SplitWhitespace(StrTendril),
+    Reprocess(InsertionMode, Token),
+    ReprocessForeign(Token),
+    Script(Handle),
+    ToPlaintext,
+    ToRawData(RawKind),
+}
+
+pub enum FormatEntry<Handle> {
+    Element(Handle, Tag),
+    Marker,
+}
+
+pub enum InsertionPoint<Handle> {
+    /// Insert as last child in this parent.
+    LastChild(Handle),
+    /// Insert before this following sibling.
+    BeforeSibling(Handle),
+    /// Insertion point is decided based on existence of element's parent node.
+    TableFosterParenting {
+        element: Handle,
+        prev_element: Handle,
+    },
+}
diff --git a/src/util/str.rs b/src/util/str.rs
new file mode 100644
index 0000000..b2eb41a
--- /dev/null
+++ b/src/util/str.rs
@@ -0,0 +1,60 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use mac::{_tt_as_expr_hack, matches};
+use std::fmt;
+
+pub fn to_escaped_string<T: fmt::Debug>(x: &T) -> String {
+    // FIXME: don't allocate twice
+    let string = format!("{:?}", x);
+    string.chars().flat_map(|c| c.escape_default()).collect()
+}
+
+/// If `c` is an ASCII letter, return the corresponding lowercase
+/// letter, otherwise None.
+pub fn lower_ascii_letter(c: char) -> Option<char> {
+    match c {
+        'a'..='z' => Some(c),
+        'A'..='Z' => Some((c as u8 - b'A' + b'a') as char),
+        _ => None,
+    }
+}
+
+/// Is the character an ASCII alphanumeric character?
+pub fn is_ascii_alnum(c: char) -> bool {
+    matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z')
+}
+
+/// ASCII whitespace characters, as defined by
+/// tree construction modes that treat them specially.
+pub fn is_ascii_whitespace(c: char) -> bool {
+    matches!(c, '\t' | '\r' | '\n' | '\x0C' | ' ')
+}
+
+#[cfg(test)]
+#[allow(non_snake_case)]
+mod test {
+    use super::{is_ascii_alnum, lower_ascii_letter};
+    use mac::test_eq;
+
+    test_eq!(lower_letter_a_is_a, lower_ascii_letter('a'), Some('a'));
+    test_eq!(lower_letter_A_is_a, lower_ascii_letter('A'), Some('a'));
+    test_eq!(lower_letter_symbol_is_None, lower_ascii_letter('!'), None);
+    test_eq!(
+        lower_letter_nonascii_is_None,
+        lower_ascii_letter('\u{a66e}'),
+        None
+    );
+
+    test_eq!(is_alnum_a, is_ascii_alnum('a'), true);
+    test_eq!(is_alnum_A, is_ascii_alnum('A'), true);
+    test_eq!(is_alnum_1, is_ascii_alnum('1'), true);
+    test_eq!(is_not_alnum_symbol, is_ascii_alnum('!'), false);
+    test_eq!(is_not_alnum_nonascii, is_ascii_alnum('\u{a66e}'), false);
+}
author	Martin Fischer <martin@push-f.com>	2021-04-08 08:42:01 +0200
committer	Martin Fischer <martin@push-f.com>	2021-04-08 15:40:37 +0200
commit	57e7eefcbe6fb8c3dc4b01c707be9de4c34963a7 (patch)
tree	6a9d296389bf3023396592c8514ed6712e011c7f /src