// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! The HTML5 tokenizer.

use self::error::InternalState;
pub use self::interface::{Attribute, Doctype, Tag, TagKind, Token};
use self::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
use self::interface::{CommentToken, DoctypeToken, EndTag, StartTag, TagToken};
pub use self::interface::{TokenSink, TokenSinkResult};

use self::states::{DoctypeIdKind, Public, System};
use self::states::{DoubleEscaped, Escaped};
use self::states::{DoubleQuoted, SingleQuoted, Unquoted};
use self::states::{Rawtext, Rcdata, ScriptData, ScriptDataEscaped};

use self::char_ref::{CharRef, CharRefTokenizer};

use crate::error::Error;
use crate::util::{smallcharset::SmallCharSet, str::lower_ascii_letter};

use std::collections::BTreeMap;
use std::default::Default;
use std::mem::replace;
use std::ops::ControlFlow;

pub use crate::util::buffer_queue::BufferQueue;
use crate::util::buffer_queue::{FromSet, NotFromSet, SetResult};
pub use states::RawKind;

mod char_ref;
pub mod error;
mod interface;
mod states;

pub enum ProcessResult {
    Suspend,
    Break,
}

#[must_use]
#[derive(Debug, PartialEq, Eq)]
pub enum TokenizerResult {
    Done,
    Break,
}

fn option_push(opt_str: &mut Option<String>, c: char) {
    match *opt_str {
        Some(ref mut s) => s.push(c),
        None => *opt_str = Some(String::from(c)),
    }
}

/// Tokenizer options, with an impl for `Default`.
#[derive(Clone)]
pub struct TokenizerOpts {
    /// Report all parse errors described in the spec, at some
    /// performance penalty? Defaults to false, except when the
    /// `spans` feature is enabled in which case it defaults to true.
    pub exact_errors: bool,

    /// Discard a `U+FEFF BYTE ORDER MARK` if we see one at the beginning
    /// of the stream?  Default: true
    pub discard_bom: bool,

    /// Keep a record of how long we spent in each state?  Printed
    /// when `end()` is called.  Default: false
    pub profile: bool,

    /// Initial state override.  Only the test runner should use
    /// a non-`None` value!
    pub initial_state: Option<states::State>,

    /// Last start tag.  Only the test runner should use a
    /// non-`None` value!
    pub last_start_tag_name: Option<String>,
}

impl Default for TokenizerOpts {
    fn default() -> TokenizerOpts {
        TokenizerOpts {
            exact_errors: cfg!(feature = "spans"),
            discard_bom: true,
            profile: false,
            initial_state: None,
            last_start_tag_name: None,
        }
    }
}

/// The HTML tokenizer.
pub struct Tokenizer<Sink> {
    /// Options controlling the behavior of the tokenizer.
    opts: TokenizerOpts,

    /// Destination for tokens we emit.
    pub sink: Sink,

    /// The abstract machine state as described in the spec.
    state: states::State,

    /// Are we at the end of the file, once buffers have been processed
    /// completely? This affects whether we will wait for lookahead or not.
    at_eof: bool,

    /// Tokenizer for character references, if we're tokenizing
    /// one at the moment.
    char_ref_tokenizer: Option<Box<CharRefTokenizer>>,

    /// Current input character.  Just consumed, may reconsume.
    current_char: char,

    /// Should we reconsume the current input character?
    reconsume: bool,

    /// Did we just consume \r, translating it to \n?  In that case we need
    /// to ignore the next character if it's \n.
    ignore_lf: bool,

    /// Discard a U+FEFF BYTE ORDER MARK if we see one?  Only done at the
    /// beginning of the stream.
    discard_bom: bool,

    /// Current tag kind.
    current_tag_kind: TagKind,

    /// Current tag name.
    current_tag_name: String,

    /// Current tag is self-closing?
    current_tag_self_closing: bool,

    /// Current tag attributes.
    current_tag_attrs: Vec<Attribute>,

    /// Current attribute name.
    current_attr_name: String,

    /// Current attribute value.
    current_attr_value: String,

    /// Current comment.
    current_comment: String,

    /// Current doctype token.
    current_doctype: Doctype,

    /// Last start tag name, for use in checking "appropriate end tag".
    last_start_tag_name: Option<String>,

    /// The "temporary buffer" mentioned in the spec.
    temp_buf: String,

    /// Record of how many ns we spent in each state, if profiling is enabled.
    state_profile: BTreeMap<states::State, u64>,

    /// Record of how many ns we spent in the token sink.
    time_in_sink: u64,

    /// Track current line
    current_line: u64,

    #[cfg(feature = "spans")]
    spans: Spans,
}

#[cfg(feature = "spans")]
#[derive(Default)]
struct Spans {
    /// Track current byte position
    current_pos: usize,

    /// Current tag name span.
    current_tag_name: core::ops::Range<usize>,

    /// Current attribute name span.
    current_attr_name: core::ops::Range<usize>,

    /// Current attribute value span.
    current_attr_value: core::ops::Range<usize>,
}

#[cfg(feature = "spans")]
impl Spans {
    fn end_tag_name(&mut self) {
        self.current_tag_name.end = self.current_pos - 1;
    }

    fn end_attr_name(&mut self) {
        self.current_attr_name.end = self.current_pos - 1;
    }
}

impl<Sink: TokenSink> Tokenizer<Sink> {
    /// Create a new tokenizer which feeds tokens to a particular `TokenSink`.
    pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer<Sink> {
        let start_tag_name = opts.last_start_tag_name.take();
        let state = opts.initial_state.unwrap_or(states::Data);
        let discard_bom = opts.discard_bom;
        Tokenizer {
            opts,
            sink,
            state,
            char_ref_tokenizer: None,
            at_eof: false,
            current_char: '\0',
            reconsume: false,
            ignore_lf: false,
            discard_bom,
            current_tag_kind: StartTag,
            current_tag_name: String::new(),
            current_tag_self_closing: false,
            current_tag_attrs: vec![],
            current_attr_name: String::new(),
            current_attr_value: String::new(),
            current_comment: String::new(),
            current_doctype: Doctype::new(),
            last_start_tag_name: start_tag_name,
            temp_buf: String::new(),
            state_profile: BTreeMap::new(),
            time_in_sink: 0,
            current_line: 1,
            #[cfg(feature = "spans")]
            spans: Spans::default(),
        }
    }

    /// Feed an input string into the tokenizer.
    pub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult {
        if input.is_empty() {
            return TokenizerResult::Done;
        }

        if self.discard_bom {
            if let Some(c) = input.peek() {
                if c == '\u{feff}' {
                    input.next();
                }
            } else {
                return TokenizerResult::Done;
            }
        };

        self.run(input)
    }

    fn process_token(&mut self, token: Token) -> TokenSinkResult {
        if self.opts.profile {
            let (ret, dt) = time!(self.sink.process_token(token, self.current_line));
            self.time_in_sink += dt;
            ret
        } else {
            self.sink.process_token(token, self.current_line)
        }
    }

    fn process_token_and_continue(&mut self, token: Token) {
        assert!(matches!(
            self.process_token(token),
            TokenSinkResult::Continue
        ));
    }

    //§ preprocessing-the-input-stream
    // Get the next input character, which might be the character
    // 'c' that we already consumed from the buffers.
    fn get_preprocessed_char(&mut self, mut c: char, input: &mut BufferQueue) -> Option<char> {
        if self.ignore_lf {
            self.ignore_lf = false;
            if c == '\n' {
                c = input.next()?;
            }
        }

        if c == '\r' {
            self.ignore_lf = true;
            c = '\n';
        }

        if c == '\n' {
            self.current_line += 1;
        }

        if self.opts.exact_errors
            && match c as u32 {
                0x01..=0x08 | 0x0B | 0x0E..=0x1F | 0x7F..=0x9F | 0xFDD0..=0xFDEF => true,
                n if (n & 0xFFFE) == 0xFFFE => true,
                _ => false,
            }
        {
            self.emit_error(Error::BadCharacter(c));
        }

        #[cfg(feature = "spans")]
        {
            self.spans.current_pos += c.len_utf8();
        }

        self.current_char = c;
        Some(c)
    }

    //§ tokenization
    // Get the next input character, if one is available.
    fn get_char(&mut self, input: &mut BufferQueue) -> Option<char> {
        if self.reconsume {
            self.reconsume = false;
            Some(self.current_char)
        } else {
            input
                .next()
                .and_then(|c| self.get_preprocessed_char(c, input))
        }
    }

    fn pop_except_from(&mut self, input: &mut BufferQueue, set: SmallCharSet) -> Option<SetResult> {
        // Bail to the slow path for various corner cases.
        // This means that `FromSet` can contain characters not in the set!
        // It shouldn't matter because the fallback `FromSet` case should
        // always do the same thing as the `NotFromSet` case.
        if self.opts.exact_errors || self.reconsume || self.ignore_lf {
            return self.get_char(input).map(FromSet);
        }

        let d = input.pop_except_from(set);
        match d {
            Some(FromSet(c)) => self.get_preprocessed_char(c, input).map(FromSet),

            // NB: We don't set self.current_char for a run of characters not
            // in the set.  It shouldn't matter for the codepaths that use
            // this.
            _ => d,
        }
    }

    // Check if the next characters are an ASCII case-insensitive match.  See
    // BufferQueue::eat.
    //
    // NB: this doesn't do input stream preprocessing or set the current input
    // character.
    fn eat(
        &mut self,
        input: &mut BufferQueue,
        pat: &str,
        eq: fn(&u8, &u8) -> bool,
    ) -> Option<bool> {
        input.push_front(replace(&mut self.temp_buf, String::new()));
        match input.eat(pat, eq) {
            None if self.at_eof => Some(false),
            None => {
                while let Some(c) = input.next() {
                    self.temp_buf.push(c);
                }
                None
            }
            Some(matched) => Some(matched),
        }
    }

    /// Run the state machine for as long as we can.
    fn run(&mut self, input: &mut BufferQueue) -> TokenizerResult {
        if self.opts.profile {
            loop {
                let state = self.state;
                let old_sink = self.time_in_sink;
                let (run, mut dt) = time!(self.step(input));
                dt -= (self.time_in_sink - old_sink);
                let new = match self.state_profile.get_mut(&state) {
                    Some(x) => {
                        *x += dt;
                        false
                    }
                    None => true,
                };
                if new {
                    // do this here because of borrow shenanigans
                    self.state_profile.insert(state, dt);
                }
                match run {
                    ControlFlow::Continue(()) => (),
                    ControlFlow::Break(ProcessResult::Suspend) => break,
                    ControlFlow::Break(ProcessResult::Break) => return TokenizerResult::Break,
                }
            }
        } else {
            loop {
                match self.step(input) {
                    ControlFlow::Continue(()) => (),
                    ControlFlow::Break(ProcessResult::Suspend) => break,
                    ControlFlow::Break(ProcessResult::Break) => return TokenizerResult::Break,
                }
            }
        }
        TokenizerResult::Done
    }

    fn bad_char_error(&mut self) {
        self.emit_error(Error::UnexpectedCharacter(
            self.current_char,
            InternalState(self.state),
        ));
    }

    fn bad_eof_error(&mut self) {
        self.emit_error(Error::UnexpectedEOF(InternalState(self.state)));
    }

    fn emit_char(&mut self, c: char) {
        self.process_token_and_continue(match c {
            '\0' => NullCharacterToken,
            _ => CharacterTokens(String::from(c)),
        });
    }

    // The string must not contain '\0'!
    fn emit_chars(&mut self, b: String) {
        self.process_token_and_continue(CharacterTokens(b));
    }

    fn emit_current_tag(&mut self) -> ControlFlow<ProcessResult> {
        self.finish_attribute();

        let name = self.current_tag_name.clone();
        self.current_tag_name.clear();

        match self.current_tag_kind {
            StartTag => {
                self.last_start_tag_name = Some(name.clone());
            }
            EndTag => {
                if !self.current_tag_attrs.is_empty() {
                    self.emit_error(Error::AttributesOnEndTag);
                }
                if self.current_tag_self_closing {
                    self.emit_error(Error::SelfClosingEndTag);
                }
            }
        }

        // https://html.spec.whatwg.org/multipage/#concept-frag-parse-context
        let next_state = match name.as_str() {
            "title" | "textarea" => states::RawData(RawKind::Rcdata),
            "style" | "xmp" | "iframe" | "noembed" | "noframes" => {
                states::RawData(RawKind::Rawtext)
            }
            "script" => states::RawData(RawKind::ScriptData),
            "plaintext" => states::Plaintext,
            _other => states::Data,
        };

        let token = TagToken(Tag {
            kind: self.current_tag_kind,
            name,
            self_closing: self.current_tag_self_closing,
            attrs: replace(&mut self.current_tag_attrs, vec![]),
            #[cfg(feature = "spans")]
            name_span: self.spans.current_tag_name.clone(),
        });

        match self.process_token(token) {
            TokenSinkResult::Continue => {
                self.state = next_state;
                ControlFlow::Continue(())
            }
            TokenSinkResult::Plaintext => {
                self.state = states::Plaintext;
                ControlFlow::Continue(())
            }
            TokenSinkResult::Break => {
                self.state = states::Data;
                ControlFlow::Break(ProcessResult::Break)
            }
            TokenSinkResult::RawData(kind) => {
                self.state = states::RawData(kind);
                ControlFlow::Continue(())
            }
        }
    }

    fn emit_temp_buf(&mut self) {
        // FIXME: Make sure that clearing on emit is spec-compatible.
        let buf = replace(&mut self.temp_buf, String::new());
        self.emit_chars(buf);
    }

    fn clear_temp_buf(&mut self) {
        // Do this without a new allocation.
        self.temp_buf.clear();
    }

    fn emit_current_comment(&mut self) {
        let comment = replace(&mut self.current_comment, String::new());
        self.process_token_and_continue(CommentToken(comment));
    }

    fn discard_tag(&mut self) {
        self.current_tag_name.clear();
        self.current_tag_self_closing = false;
        self.current_tag_attrs = vec![];
    }

    fn create_tag(&mut self, kind: TagKind, c: char) {
        self.discard_tag();
        self.current_tag_name.push(c);
        self.current_tag_kind = kind;
    }

    fn have_appropriate_end_tag(&self) -> bool {
        match self.last_start_tag_name.as_ref() {
            Some(last) => (self.current_tag_kind == EndTag) && (*self.current_tag_name == **last),
            None => false,
        }
    }

    fn create_attribute(&mut self, c: char) {
        self.finish_attribute();

        self.current_attr_name.push(c);
    }

    fn finish_attribute(&mut self) {
        if self.current_attr_name.is_empty() {
            return;
        }

        // Check for a duplicate attribute.
        // FIXME: the spec says we should error as soon as the name is finished.
        // FIXME: linear time search, do we care?
        let dup = {
            let name = &*self.current_attr_name;
            self.current_tag_attrs.iter().any(|a| &*a.name == name)
        };

        if dup {
            self.emit_error(Error::DuplicateAttribute {
                #[cfg(feature = "spans")]
                span: self.spans.current_attr_name.clone(),
            });
            self.current_attr_name.clear();
            self.current_attr_value.clear();
        } else {
            let name = self.current_attr_name.clone();
            self.current_attr_name.clear();
            self.current_tag_attrs.push(Attribute {
                name: name,
                value: replace(&mut self.current_attr_value, String::new()),
                #[cfg(feature = "spans")]
                name_span: self.spans.current_attr_name.clone(),
                #[cfg(feature = "spans")]
                value_span: self.spans.current_attr_value.clone(),
            });
        }
    }

    fn emit_current_doctype(&mut self) {
        let doctype = replace(&mut self.current_doctype, Doctype::new());
        self.process_token_and_continue(DoctypeToken(doctype));
    }

    fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<String> {
        match kind {
            Public => &mut self.current_doctype.public_id,
            System => &mut self.current_doctype.system_id,
        }
    }

    fn clear_doctype_id(&mut self, kind: DoctypeIdKind) {
        let id = self.doctype_id(kind);
        match *id {
            Some(ref mut s) => s.clear(),
            None => *id = Some(String::new()),
        }
    }

    fn consume_char_ref(&mut self, addnl_allowed: Option<char>) {
        // NB: The char ref tokenizer assumes we have an additional allowed
        // character iff we're tokenizing in an attribute value.
        self.char_ref_tokenizer = Some(Box::new(CharRefTokenizer::new(addnl_allowed)));
    }

    fn emit_eof(&mut self) {
        self.process_token_and_continue(EOFToken);
    }

    fn peek(&mut self, input: &BufferQueue) -> Option<char> {
        if self.reconsume {
            Some(self.current_char)
        } else {
            input.peek()
        }
    }

    fn discard_char(&mut self, input: &mut BufferQueue) {
        self.get_char(input);
    }

    fn emit_error(&mut self, error: Error) {
        self.process_token_and_continue(ParseError {
            error,
            #[cfg(feature = "spans")]
            span: self.spans.current_pos - 1..self.spans.current_pos - 1,
        });
    }
}
//§ END

// Shorthand for common state machine behaviors.
macro_rules! shorthand (
    ( $me:ident : emit $c:expr                     ) => ( $me.emit_char($c)                                   );
    ( $me:ident : create_tag $kind:ident $c:expr   ) => ( $me.create_tag($kind, $c)                           );
    ( $me:ident : push_tag $c:expr                 ) => ( $me.current_tag_name.push($c)                       );
    ( $me:ident : discard_tag                      ) => ( $me.discard_tag()                                   );
    ( $me:ident : discard_char $input:expr         ) => ( $me.discard_char($input)                            );
    ( $me:ident : push_temp $c:expr                ) => ( $me.temp_buf.push($c)                               );
    ( $me:ident : emit_temp                        ) => ( $me.emit_temp_buf()                                 );
    ( $me:ident : clear_temp                       ) => ( $me.clear_temp_buf()                                );
    ( $me:ident : create_attr $c:expr              ) => ( $me.create_attribute($c)                            );
    ( $me:ident : push_name $c:expr                ) => ( $me.current_attr_name.push($c)                      );
    ( $me:ident : push_value $c:expr               ) => ( $me.current_attr_value.push($c)                     );
    ( $me:ident : append_value $c:expr             ) => ( $me.current_attr_value.push_str($c)                 );
    ( $me:ident : push_comment $c:expr             ) => ( $me.current_comment.push($c)                        );
    ( $me:ident : append_comment $c:expr           ) => ( $me.current_comment.push_str($c)                    );
    ( $me:ident : emit_comment                     ) => ( $me.emit_current_comment()                          );
    ( $me:ident : clear_comment                    ) => ( $me.current_comment.clear()                         );
    ( $me:ident : create_doctype                   ) => ( $me.current_doctype = Doctype::new()                );
    ( $me:ident : push_doctype_name $c:expr        ) => ( option_push(&mut $me.current_doctype.name, $c)      );
    ( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push($me.doctype_id($k), $c)                 );
    ( $me:ident : clear_doctype_id $k:ident        ) => ( $me.clear_doctype_id($k)                            );
    ( $me:ident : force_quirks                     ) => ( $me.current_doctype.force_quirks = true             );
    ( $me:ident : emit_doctype                     ) => ( $me.emit_current_doctype()                          );
    ( $me:ident : error                            ) => ( $me.bad_char_error()                                );
    ( $me:ident : error_eof                        ) => ( $me.bad_eof_error()                                 );
);

// Tracing of tokenizer actions.  This adds significant bloat and compile time,
// so it's behind a cfg flag.
#[cfg(trace_tokenizer)]
macro_rules! sh_trace ( ( $me:ident : $($cmds:tt)* ) => ({
    println!("  {:s}", stringify!($($cmds)*));
    shorthand!($me:expr : $($cmds)*);
}));

#[cfg(not(trace_tokenizer))]
macro_rules! sh_trace ( ( $me:ident : $($cmds:tt)* ) => ( shorthand!($me: $($cmds)*) ) );

// A little DSL for sequencing shorthand actions.
macro_rules! go (
    ( $me:ident : to $s:ident                    ) => ({ $me.state = states::$s; ControlFlow::Continue(())           });
    ( $me:ident : to $s:ident $k1:expr           ) => ({ $me.state = states::$s($k1); ControlFlow::Continue(())      });
    ( $me:ident : to $s:ident $k1:ident $k2:expr ) => ({ $me.state = states::$s($k1($k2)); ControlFlow::Continue(()) });

    ( $me:ident : consume_char_ref             ) => ({ $me.consume_char_ref(None); ControlFlow::Continue(())         });
    ( $me:ident : consume_char_ref $addnl:expr ) => ({ $me.consume_char_ref(Some($addnl)); ControlFlow::Continue(()) });

    // We have a default next state after emitting a tag, but the sink can override.
    ( $me:ident : emit_tag $s:ident ) => ({
        $me.state = states::$s;
        $me.emit_current_tag()
    });

    // If nothing else matched, it's a single command
    ( $me:ident : $($cmd:tt)+ ) => ( sh_trace!($me: $($cmd)+) );
);

macro_rules! go_match ( ( $me:ident : $x:expr, $($pats:pat),+ => $($cmds:tt)* ) => (
    match $x {
        $($pats)|+ => go!($me: $($cmds)*),
        _ => (),
    }
));

// This is a macro because it can cause early return
// from the function where it is used.
macro_rules! get_char ( ($me:expr, $input:expr) => (
    match $me.get_char($input) {
        Some(char) => ControlFlow::Continue(char),
        None => ControlFlow::Break(ProcessResult::Suspend)
    }
));

macro_rules! peek ( ($me:expr, $input:expr) => (
    match $me.peek($input) {
        Some(char) => ControlFlow::Continue(char),
        None => ControlFlow::Break(ProcessResult::Suspend)
    }
));

macro_rules! pop_except_from ( ($me:expr, $input:expr, $set:expr) => (
    match $me.pop_except_from($input, $set) {
        Some(char) => ControlFlow::Continue(char),
        None => ControlFlow::Break(ProcessResult::Suspend)
    }
));

macro_rules! eat ( ($me:expr, $input:expr, $pat:expr) => (
    match $me.eat($input, $pat, u8::eq_ignore_ascii_case) {
        Some(char) => ControlFlow::Continue(char),
        None => ControlFlow::Break(ProcessResult::Suspend)
    }
));

macro_rules! eat_exact ( ($me:expr, $input:expr, $pat:expr) => (
    match $me.eat($input, $pat, u8::eq) {
        Some(char) => ControlFlow::Continue(char),
        None => ControlFlow::Break(ProcessResult::Suspend)
    }
));

impl<Sink: TokenSink> Tokenizer<Sink> {
    // Run the state machine for a while.
    // Return true if we should be immediately re-invoked
    // (this just simplifies control flow vs. break / continue).
    #[allow(clippy::never_loop)]
    fn step(&mut self, input: &mut BufferQueue) -> ControlFlow<ProcessResult> {
        if self.char_ref_tokenizer.is_some() {
            return self.step_char_ref_tokenizer(input);
        }

        match self.state {
            //§ data-state
            states::Data => loop {
                match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n'))? {
                    FromSet('\0') => {
                        go!(self: error);
                        go!(self: emit '\0');
                    }
                    FromSet('&') => {
                        return go!(self: consume_char_ref);
                    }
                    FromSet('<') => {
                        return go!(self: to TagOpen);
                    }
                    FromSet(c) => {
                        go!(self: emit c);
                    }
                    NotFromSet(b) => self.emit_chars(b),
                }
            },

            //§ rcdata-state
            states::RawData(Rcdata) => loop {
                match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n'))? {
                    FromSet('\0') => {
                        go!(self: error);
                        go!(self: emit '\u{fffd}');
                    }
                    FromSet('&') => {
                        return go!(self: consume_char_ref);
                    }
                    FromSet('<') => {
                        return go!(self: to RawLessThanSign Rcdata);
                    }
                    FromSet(c) => {
                        go!(self: emit c);
                    }
                    NotFromSet(b) => self.emit_chars(b),
                }
            },

            //§ rawtext-state
            states::RawData(Rawtext) => loop {
                match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n'))? {
                    FromSet('\0') => {
                        go!(self: error);
                        go!(self: emit '\u{fffd}');
                    }
                    FromSet('<') => {
                        return go!(self: to RawLessThanSign Rawtext);
                    }
                    FromSet(c) => {
                        go!(self: emit c);
                    }
                    NotFromSet(b) => self.emit_chars(b),
                }
            },

            //§ script-data-state
            states::RawData(ScriptData) => loop {
                match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n'))? {
                    FromSet('\0') => {
                        go!(self: error);
                        go!(self: emit '\u{fffd}');
                    }
                    FromSet('<') => {
                        return go!(self: to RawLessThanSign ScriptData);
                    }
                    FromSet(c) => {
                        go!(self: emit c);
                    }
                    NotFromSet(b) => self.emit_chars(b),
                }
            },

            //§ script-data-escaped-state
            states::RawData(ScriptDataEscaped(Escaped)) => loop {
                match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n'))? {
                    FromSet('\0') => {
                        go!(self: error);
                        go!(self: emit '\u{fffd}');
                    }
                    FromSet('-') => {
                        go!(self: emit '-');
                        return go!(self: to ScriptDataEscapedDash Escaped);
                    }
                    FromSet('<') => {
                        return go!(self: to RawLessThanSign ScriptDataEscaped Escaped);
                    }
                    FromSet(c) => {
                        go!(self: emit c);
                    }
                    NotFromSet(b) => self.emit_chars(b),
                }
            },

            //§ script-data-double-escaped-state
            states::RawData(ScriptDataEscaped(DoubleEscaped)) => loop {
                match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n'))? {
                    FromSet('\0') => {
                        go!(self: error);
                        go!(self: emit '\u{fffd}');
                    }
                    FromSet('-') => {
                        go!(self: emit '-');
                        return go!(self: to ScriptDataEscapedDash DoubleEscaped);
                    }
                    FromSet('<') => {
                        go!(self: emit '<');
                        return go!(self: to RawLessThanSign ScriptDataEscaped DoubleEscaped);
                    }
                    FromSet(c) => {
                        go!(self: emit c);
                    }
                    NotFromSet(b) => self.emit_chars(b),
                }
            },

            //§ plaintext-state
            states::Plaintext => loop {
                match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n'))? {
                    FromSet('\0') => {
                        go!(self: error);
                        go!(self: emit '\u{fffd}');
                    }
                    FromSet(c) => {
                        go!(self: emit c);
                    }
                    NotFromSet(b) => self.emit_chars(b),
                }
            },

            //§ tag-open-state
            states::TagOpen => loop {
                match get_char!(self, input)? {
                    '!' => {
                        go!(self: clear_temp);
                        return go!(self: to MarkupDeclarationOpen);
                    }
                    '/' => {
                        return go!(self: to EndTagOpen);
                    }
                    '?' => {
                        go!(self: error);
                        go!(self: clear_comment);
                        go!(self: push_comment '?');
                        return go!(self: to BogusComment);
                    }
                    c => match lower_ascii_letter(c) {
                        Some(cl) => {
                            #[cfg(feature = "spans")]
                            {
                                self.spans.current_tag_name.start = self.spans.current_pos - 1;
                            }
                            go!(self: create_tag StartTag cl);
                            return go!(self: to TagName);
                        }
                        None => {
                            go!(self: error);
                            go!(self: emit '<');
                            self.reconsume = true;
                            return go!(self: to Data);
                        }
                    },
                }
            },

            //§ end-tag-open-state
            states::EndTagOpen => loop {
                match get_char!(self, input)? {
                    '>' => {
                        go!(self: error);
                        return go!(self: to Data);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: clear_comment);
                        go!(self: push_comment '\u{fffd}');
                        return go!(self: to BogusComment);
                    }
                    c => match lower_ascii_letter(c) {
                        Some(cl) => {
                            #[cfg(feature = "spans")]
                            {
                                self.spans.current_tag_name.start = self.spans.current_pos - 1;
                            }
                            go!(self: create_tag EndTag cl);
                            return go!(self: to TagName);
                        }
                        None => {
                            go!(self: error);
                            go!(self: clear_comment);
                            go!(self: push_comment c);
                            return go!(self: to BogusComment);
                        }
                    },
                }
            },

            //§ tag-name-state
            states::TagName => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => {
                        #[cfg(feature = "spans")]
                        self.spans.end_tag_name();
                        return go!(self: to BeforeAttributeName);
                    }
                    '/' => {
                        #[cfg(feature = "spans")]
                        self.spans.end_tag_name();
                        return go!(self: to SelfClosingStartTag);
                    }
                    '>' => {
                        #[cfg(feature = "spans")]
                        self.spans.end_tag_name();
                        return go!(self: emit_tag Data);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: push_tag '\u{fffd}');
                    }
                    c => {
                        go!(self: push_tag (c.to_ascii_lowercase()));
                    }
                }
            },

            //§ script-data-escaped-less-than-sign-state
            states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop {
                match get_char!(self, input)? {
                    '/' => {
                        go!(self: clear_temp);
                        return go!(self: to RawEndTagOpen ScriptDataEscaped Escaped);
                    }
                    c => match lower_ascii_letter(c) {
                        Some(cl) => {
                            go!(self: clear_temp);
                            go!(self: push_temp cl);
                            go!(self: emit '<');
                            go!(self: emit c);
                            return go!(self: to ScriptDataEscapeStart DoubleEscaped);
                        }
                        None => {
                            go!(self: emit '<');
                            self.reconsume = true;
                            return go!(self: to RawData ScriptDataEscaped Escaped);
                        }
                    },
                }
            },

            //§ script-data-double-escaped-less-than-sign-state
            states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop {
                match get_char!(self, input)? {
                    '/' => {
                        go!(self: clear_temp);
                        go!(self: emit '/');
                        return go!(self: to ScriptDataDoubleEscapeEnd);
                    }
                    _ => {
                        self.reconsume = true;
                        return go!(self: to RawData ScriptDataEscaped DoubleEscaped);
                    }
                }
            },

            //§ rcdata-less-than-sign-state rawtext-less-than-sign-state script-data-less-than-sign-state
            // otherwise
            states::RawLessThanSign(kind) => loop {
                match get_char!(self, input)? {
                    '/' => {
                        go!(self: clear_temp);
                        return go!(self: to RawEndTagOpen kind);
                    }
                    '!' if kind == ScriptData => {
                        go!(self: emit '<');
                        go!(self: emit '!');
                        return go!(self: to ScriptDataEscapeStart Escaped);
                    }
                    _ => {
                        go!(self: emit '<');
                        self.reconsume = true;
                        return go!(self: to RawData kind);
                    }
                }
            },

            //§ rcdata-end-tag-open-state rawtext-end-tag-open-state script-data-end-tag-open-state script-data-escaped-end-tag-open-state
            states::RawEndTagOpen(kind) => loop {
                let c = get_char!(self, input)?;
                match lower_ascii_letter(c) {
                    Some(cl) => {
                        go!(self: create_tag EndTag cl);
                        go!(self: push_temp c);
                        return go!(self: to RawEndTagName kind);
                    }
                    None => {
                        go!(self: emit '<');
                        go!(self: emit '/');
                        self.reconsume = true;
                        return go!(self: to RawData kind);
                    }
                }
            },

            //§ rcdata-end-tag-name-state rawtext-end-tag-name-state script-data-end-tag-name-state script-data-escaped-end-tag-name-state
            states::RawEndTagName(kind) => loop {
                let c = get_char!(self, input)?;
                if self.have_appropriate_end_tag() {
                    match c {
                        '\t' | '\n' | '\x0C' | ' ' => {
                            return go!(self: to BeforeAttributeName);
                        }
                        '/' => {
                            return go!(self: to SelfClosingStartTag);
                        }
                        '>' => {
                            return go!(self: emit_tag Data);
                        }
                        _ => (),
                    }
                }

                match lower_ascii_letter(c) {
                    Some(cl) => {
                        go!(self: push_tag cl);
                        go!(self: push_temp c);
                    }
                    None => {
                        go!(self: discard_tag);
                        go!(self: emit '<');
                        go!(self: emit '/');
                        go!(self: emit_temp);
                        self.reconsume = true;
                        return go!(self: to RawData kind);
                    }
                }
            },

            //§ script-data-double-escape-start-state
            states::ScriptDataEscapeStart(DoubleEscaped) => loop {
                let c = get_char!(self, input)?;
                match c {
                    '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => {
                        let esc = if &*self.temp_buf == "script" {
                            DoubleEscaped
                        } else {
                            Escaped
                        };
                        {
                            go!(self: emit c);
                            return go!(self: to RawData ScriptDataEscaped esc);
                        };
                    }
                    _ => match lower_ascii_letter(c) {
                        Some(cl) => {
                            go!(self: push_temp cl);
                            go!(self: emit c);
                        }
                        None => {
                            self.reconsume = true;
                            return go!(self: to RawData ScriptDataEscaped Escaped);
                        }
                    },
                }
            },

            //§ script-data-escape-start-state
            states::ScriptDataEscapeStart(Escaped) => loop {
                match get_char!(self, input)? {
                    '-' => {
                        go!(self: emit '-');
                        return go!(self: to ScriptDataEscapeStartDash);
                    }
                    _ => {
                        self.reconsume = true;
                        return go!(self: to RawData ScriptData);
                    }
                }
            },

            //§ script-data-escape-start-dash-state
            states::ScriptDataEscapeStartDash => loop {
                match get_char!(self, input)? {
                    '-' => {
                        go!(self: emit '-');
                        return go!(self: to ScriptDataEscapedDashDash Escaped);
                    }
                    _ => {
                        self.reconsume = true;
                        return go!(self: to RawData ScriptData);
                    }
                }
            },

            //§ script-data-escaped-dash-state script-data-double-escaped-dash-state
            states::ScriptDataEscapedDash(kind) => loop {
                match get_char!(self, input)? {
                    '-' => {
                        go!(self: emit '-');
                        return go!(self: to ScriptDataEscapedDashDash kind);
                    }
                    '<' => {
                        if kind == DoubleEscaped {
                            {
                                go!(self: emit '<');
                            };
                        }
                        {
                            return go!(self: to RawLessThanSign ScriptDataEscaped kind);
                        };
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: emit '\u{fffd}');
                        return go!(self: to RawData ScriptDataEscaped kind);
                    }
                    c => {
                        go!(self: emit c);
                        return go!(self: to RawData ScriptDataEscaped kind);
                    }
                }
            },

            //§ script-data-escaped-dash-dash-state script-data-double-escaped-dash-dash-state
            states::ScriptDataEscapedDashDash(kind) => loop {
                match get_char!(self, input)? {
                    '-' => {
                        go!(self: emit '-');
                    }
                    '<' => {
                        if kind == DoubleEscaped {
                            {
                                go!(self: emit '<');
                            };
                        }
                        {
                            return go!(self: to RawLessThanSign ScriptDataEscaped kind);
                        };
                    }
                    '>' => {
                        go!(self: emit '>');
                        return go!(self: to RawData ScriptData);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: emit '\u{fffd}');
                        return go!(self: to RawData ScriptDataEscaped kind);
                    }
                    c => {
                        go!(self: emit c);
                        return go!(self: to RawData ScriptDataEscaped kind);
                    }
                }
            },

            //§ script-data-double-escape-end-state
            states::ScriptDataDoubleEscapeEnd => loop {
                let c = get_char!(self, input)?;
                match c {
                    '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => {
                        let esc = if &*self.temp_buf == "script" {
                            Escaped
                        } else {
                            DoubleEscaped
                        };
                        {
                            go!(self: emit c);
                            return go!(self: to RawData ScriptDataEscaped esc);
                        };
                    }
                    _ => match lower_ascii_letter(c) {
                        Some(cl) => {
                            go!(self: push_temp cl);
                            go!(self: emit c);
                        }
                        None => {
                            self.reconsume = true;
                            return go!(self: to RawData ScriptDataEscaped DoubleEscaped);
                        }
                    },
                }
            },

            //§ before-attribute-name-state
            states::BeforeAttributeName => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => (),
                    '/' => {
                        return go!(self: to SelfClosingStartTag);
                    }
                    '>' => {
                        return go!(self: emit_tag Data);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: create_attr '\u{fffd}');
                        return go!(self: to AttributeName);
                    }
                    c => match lower_ascii_letter(c) {
                        Some(cl) => {
                            go!(self: create_attr cl);
                            #[cfg(feature = "spans")]
                            {
                                self.spans.current_attr_name.start = self.spans.current_pos - 1;
                            }
                            return go!(self: to AttributeName);
                        }
                        None => {
                            go_match!(self: c,
                            '"' , '\'' , '<' , '=' => error);
                            {
                                go!(self: create_attr c);
                                return go!(self: to AttributeName);
                            };
                        }
                    },
                }
            },

            //§ attribute-name-state
            states::AttributeName => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => {
                        #[cfg(feature = "spans")]
                        self.spans.end_attr_name();
                        return go!(self: to AfterAttributeName);
                    }
                    '/' => {
                        #[cfg(feature = "spans")]
                        self.spans.end_attr_name();
                        return go!(self: to SelfClosingStartTag);
                    }
                    '=' => {
                        #[cfg(feature = "spans")]
                        self.spans.end_attr_name();
                        return go!(self: to BeforeAttributeValue);
                    }
                    '>' => {
                        #[cfg(feature = "spans")]
                        self.spans.end_attr_name();
                        return go!(self: emit_tag Data);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: push_name '\u{fffd}');
                    }
                    c => match lower_ascii_letter(c) {
                        Some(cl) => {
                            go!(self: push_name cl);
                        }
                        None => {
                            go_match!(self: c,
                            '"' , '\'' , '<' => error);
                            {
                                go!(self: push_name c);
                            };
                        }
                    },
                }
            },

            //§ after-attribute-name-state
            states::AfterAttributeName => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => (),
                    '/' => {
                        return go!(self: to SelfClosingStartTag);
                    }
                    '=' => {
                        return go!(self: to BeforeAttributeValue);
                    }
                    '>' => {
                        return go!(self: emit_tag Data);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: create_attr '\u{fffd}');
                        return go!(self: to AttributeName);
                    }
                    c => match lower_ascii_letter(c) {
                        Some(cl) => {
                            go!(self: create_attr cl);
                            return go!(self: to AttributeName);
                        }
                        None => {
                            go_match!(self: c,
                            '"' , '\'' , '<' => error);
                            {
                                go!(self: create_attr c);
                                return go!(self: to AttributeName);
                            };
                        }
                    },
                }
            },

            //§ before-attribute-value-state
            // Use peek so we can handle the first attr character along with the rest,
            // hopefully in the same zero-copy buffer.
            states::BeforeAttributeValue => loop {
                match peek!(self, input)? {
                    '\t' | '\n' | '\r' | '\x0C' | ' ' => {
                        go!(self: discard_char input);
                    }
                    '"' => {
                        go!(self: discard_char input);
                        return go!(self: to AttributeValue DoubleQuoted);
                    }
                    '\'' => {
                        go!(self: discard_char input);
                        return go!(self: to AttributeValue SingleQuoted);
                    }
                    '\0' => {
                        go!(self: discard_char input);
                        go!(self: error);
                        go!(self: push_value '\u{fffd}');
                        return go!(self: to AttributeValue Unquoted);
                    }
                    '>' => {
                        go!(self: discard_char input);
                        go!(self: error);
                        return go!(self: emit_tag Data);
                    }
                    _ => {
                        return go!(self: to AttributeValue Unquoted);
                    }
                }
            },

            //§ attribute-value-(double-quoted)-state
            states::AttributeValue(DoubleQuoted) => {
                #[cfg(feature = "spans")]
                {
                    self.spans.current_attr_value.start = self.spans.current_pos;
                }
                loop {
                    match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n'))? {
                        FromSet('"') => {
                            return go!(self: to AfterAttributeValueQuoted);
                        }
                        FromSet('&') => {
                            return go!(self: consume_char_ref '"');
                        }
                        FromSet('\0') => {
                            go!(self: error);
                            go!(self: push_value '\u{fffd}');
                        }
                        FromSet(c) => {
                            go!(self: push_value c);
                        }
                        NotFromSet(ref b) => {
                            go!(self: append_value b);
                        }
                    }
                }
            }

            //§ attribute-value-(single-quoted)-state
            states::AttributeValue(SingleQuoted) => {
                #[cfg(feature = "spans")]
                {
                    self.spans.current_attr_value.start = self.spans.current_pos;
                }
                loop {
                    match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n'))? {
                        FromSet('\'') => {
                            return go!(self: to AfterAttributeValueQuoted);
                        }
                        FromSet('&') => {
                            return go!(self: consume_char_ref '\'');
                        }
                        FromSet('\0') => {
                            go!(self: error);
                            go!(self: push_value '\u{fffd}');
                        }
                        FromSet(c) => {
                            go!(self: push_value c);
                        }
                        NotFromSet(ref b) => {
                            go!(self: append_value b);
                        }
                    }
                }
            }

            //§ attribute-value-(unquoted)-state
            states::AttributeValue(Unquoted) => {
                #[cfg(feature = "spans")]
                {
                    self.spans.current_attr_value.start = self.spans.current_pos;
                }
                loop {
                    match pop_except_from!(
                        self,
                        input,
                        small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0')
                    )? {
                        FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => {
                            #[cfg(feature = "spans")]
                            {
                                self.spans.current_attr_value.end = self.spans.current_pos - 1;
                            }
                            return go!(self: to BeforeAttributeName);
                        }
                        FromSet('&') => {
                            return go!(self: consume_char_ref '>');
                        }
                        FromSet('>') => {
                            #[cfg(feature = "spans")]
                            {
                                self.spans.current_attr_value.end = self.spans.current_pos - 1;
                            }
                            return go!(self: emit_tag Data);
                        }
                        FromSet('\0') => {
                            go!(self: error);
                            go!(self: push_value '\u{fffd}');
                        }
                        FromSet(c) => {
                            go_match!(self: c,
                            '"' , '\'' , '<' , '=' , '`' => error);
                            {
                                go!(self: push_value c);
                            };
                        }
                        NotFromSet(ref b) => {
                            go!(self: append_value b);
                        }
                    }
                }
            }

            //§ after-attribute-value-(quoted)-state
            states::AfterAttributeValueQuoted => {
                #[cfg(feature = "spans")]
                {
                    self.spans.current_attr_value.end = self.spans.current_pos - 1;
                }

                loop {
                    match get_char!(self, input)? {
                        '\t' | '\n' | '\x0C' | ' ' => {
                            return go!(self: to BeforeAttributeName);
                        }
                        '/' => {
                            return go!(self: to SelfClosingStartTag);
                        }
                        '>' => {
                            return go!(self: emit_tag Data);
                        }
                        _ => {
                            go!(self: error);
                            self.reconsume = true;
                            return go!(self: to BeforeAttributeName);
                        }
                    }
                }
            }

            //§ self-closing-start-tag-state
            states::SelfClosingStartTag => loop {
                match get_char!(self, input)? {
                    '>' => {
                        self.current_tag_self_closing = true;
                        {
                            return go!(self: emit_tag Data);
                        };
                    }
                    _ => {
                        go!(self: error);
                        self.reconsume = true;
                        return go!(self: to BeforeAttributeName);
                    }
                }
            },

            //§ comment-start-state
            states::CommentStart => loop {
                match get_char!(self, input)? {
                    '-' => {
                        return go!(self: to CommentStartDash);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: push_comment '\u{fffd}');
                        return go!(self: to Comment);
                    }
                    '>' => {
                        go!(self: error);
                        go!(self: emit_comment);
                        return go!(self: to Data);
                    }
                    c => {
                        go!(self: push_comment c);
                        return go!(self: to Comment);
                    }
                }
            },

            //§ comment-start-dash-state
            states::CommentStartDash => loop {
                match get_char!(self, input)? {
                    '-' => {
                        return go!(self: to CommentEnd);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: append_comment "-\u{fffd}");
                        return go!(self: to Comment);
                    }
                    '>' => {
                        go!(self: error);
                        go!(self: emit_comment);
                        return go!(self: to Data);
                    }
                    c => {
                        go!(self: push_comment '-');
                        go!(self: push_comment c);
                        return go!(self: to Comment);
                    }
                }
            },

            //§ comment-state
            states::Comment => loop {
                match get_char!(self, input)? {
                    '-' => {
                        return go!(self: to CommentEndDash);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: push_comment '\u{fffd}');
                    }
                    c => {
                        go!(self: push_comment c);
                    }
                }
            },

            //§ comment-end-dash-state
            states::CommentEndDash => loop {
                match get_char!(self, input)? {
                    '-' => {
                        return go!(self: to CommentEnd);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: append_comment "-\u{fffd}");
                        return go!(self: to Comment);
                    }
                    c => {
                        go!(self: push_comment '-');
                        go!(self: push_comment c);
                        return go!(self: to Comment);
                    }
                }
            },

            //§ comment-end-state
            states::CommentEnd => loop {
                match get_char!(self, input)? {
                    '>' => {
                        go!(self: emit_comment);
                        return go!(self: to Data);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: append_comment "--\u{fffd}");
                        return go!(self: to Comment);
                    }
                    '!' => {
                        go!(self: error);
                        return go!(self: to CommentEndBang);
                    }
                    '-' => {
                        go!(self: error);
                        go!(self: push_comment '-');
                    }
                    c => {
                        go!(self: error);
                        go!(self: append_comment "--");
                        go!(self: push_comment c);
                        return go!(self: to Comment);
                    }
                }
            },

            //§ comment-end-bang-state
            states::CommentEndBang => loop {
                match get_char!(self, input)? {
                    '-' => {
                        go!(self: append_comment "--!");
                        return go!(self: to CommentEndDash);
                    }
                    '>' => {
                        go!(self: emit_comment);
                        return go!(self: to Data);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: append_comment "--!\u{fffd}");
                        return go!(self: to Comment);
                    }
                    c => {
                        go!(self: append_comment "--!");
                        go!(self: push_comment c);
                        return go!(self: to Comment);
                    }
                }
            },

            //§ doctype-state
            states::Doctype => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => {
                        return go!(self: to BeforeDoctypeName);
                    }
                    _ => {
                        go!(self: error);
                        self.reconsume = true;
                        return go!(self: to BeforeDoctypeName);
                    }
                }
            },

            //§ before-doctype-name-state
            states::BeforeDoctypeName => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => (),
                    '\0' => {
                        go!(self: error);
                        go!(self: create_doctype);
                        go!(self: push_doctype_name '\u{fffd}');
                        return go!(self: to DoctypeName);
                    }
                    '>' => {
                        go!(self: error);
                        go!(self: create_doctype);
                        go!(self: force_quirks);
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    c => {
                        go!(self: create_doctype);
                        go!(self: push_doctype_name (c.to_ascii_lowercase()));
                        return go!(self: to DoctypeName);
                    }
                }
            },

            //§ doctype-name-state
            states::DoctypeName => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => {
                        go!(self: clear_temp);
                        return go!(self: to AfterDoctypeName);
                    }
                    '>' => {
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: push_doctype_name '\u{fffd}');
                    }
                    c => {
                        go!(self: push_doctype_name (c.to_ascii_lowercase()));
                    }
                }
            },

            //§ after-doctype-name-state
            states::AfterDoctypeName => loop {
                if eat!(self, input, "public")? {
                    {
                        return go!(self: to AfterDoctypeKeyword Public);
                    };
                } else if eat!(self, input, "system")? {
                    {
                        return go!(self: to AfterDoctypeKeyword System);
                    };
                } else {
                    match get_char!(self, input)? {
                        '\t' | '\n' | '\x0C' | ' ' => (),
                        '>' => {
                            go!(self: emit_doctype);
                            return go!(self: to Data);
                        }
                        _ => {
                            go!(self: error);
                            go!(self: force_quirks);
                            return go!(self: to BogusDoctype);
                        }
                    }
                }
            },

            //§ after-doctype-public-keyword-state after-doctype-system-keyword-state
            states::AfterDoctypeKeyword(kind) => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => {
                        return go!(self: to BeforeDoctypeIdentifier kind);
                    }
                    '"' => {
                        go!(self: error);
                        go!(self: clear_doctype_id kind);
                        return go!(self: to DoctypeIdentifierDoubleQuoted kind);
                    }
                    '\'' => {
                        go!(self: error);
                        go!(self: clear_doctype_id kind);
                        return go!(self: to DoctypeIdentifierSingleQuoted kind);
                    }
                    '>' => {
                        go!(self: error);
                        go!(self: force_quirks);
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    _ => {
                        go!(self: error);
                        go!(self: force_quirks);
                        return go!(self: to BogusDoctype);
                    }
                }
            },

            //§ before-doctype-public-identifier-state before-doctype-system-identifier-state
            states::BeforeDoctypeIdentifier(kind) => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => (),
                    '"' => {
                        go!(self: clear_doctype_id kind);
                        return go!(self: to DoctypeIdentifierDoubleQuoted kind);
                    }
                    '\'' => {
                        go!(self: clear_doctype_id kind);
                        return go!(self: to DoctypeIdentifierSingleQuoted kind);
                    }
                    '>' => {
                        go!(self: error);
                        go!(self: force_quirks);
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    _ => {
                        go!(self: error);
                        go!(self: force_quirks);
                        return go!(self: to BogusDoctype);
                    }
                }
            },

            //§ doctype-public-identifier-(double-quoted)-state doctype-system-identifier-(double-quoted)-state
            states::DoctypeIdentifierDoubleQuoted(kind) => loop {
                match get_char!(self, input)? {
                    '"' => {
                        return go!(self: to AfterDoctypeIdentifier kind);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: push_doctype_id kind '\u{fffd}');
                    }
                    '>' => {
                        go!(self: error);
                        go!(self: force_quirks);
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    c => {
                        go!(self: push_doctype_id kind c);
                    }
                }
            },

            //§ doctype-public-identifier-(single-quoted)-state doctype-system-identifier-(single-quoted)-state
            states::DoctypeIdentifierSingleQuoted(kind) => loop {
                match get_char!(self, input)? {
                    '\'' => {
                        return go!(self: to AfterDoctypeIdentifier kind);
                    }
                    '\0' => {
                        go!(self: error);
                        go!(self: push_doctype_id kind '\u{fffd}');
                    }
                    '>' => {
                        go!(self: error);
                        go!(self: force_quirks);
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    c => {
                        go!(self: push_doctype_id kind c);
                    }
                }
            },

            //§ after-doctype-public-identifier-state
            states::AfterDoctypeIdentifier(Public) => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => {
                        return go!(self: to BetweenDoctypePublicAndSystemIdentifiers);
                    }
                    '>' => {
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    '"' => {
                        go!(self: error);
                        go!(self: clear_doctype_id System);
                        return go!(self: to DoctypeIdentifierDoubleQuoted System);
                    }
                    '\'' => {
                        go!(self: error);
                        go!(self: clear_doctype_id System);
                        return go!(self: to DoctypeIdentifierSingleQuoted System);
                    }
                    _ => {
                        go!(self: error);
                        go!(self: force_quirks);
                        return go!(self: to BogusDoctype);
                    }
                }
            },

            //§ after-doctype-system-identifier-state
            states::AfterDoctypeIdentifier(System) => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => (),
                    '>' => {
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    _ => {
                        go!(self: error);
                        return go!(self: to BogusDoctype);
                    }
                }
            },

            //§ between-doctype-public-and-system-identifiers-state
            states::BetweenDoctypePublicAndSystemIdentifiers => loop {
                match get_char!(self, input)? {
                    '\t' | '\n' | '\x0C' | ' ' => (),
                    '>' => {
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    '"' => {
                        go!(self: clear_doctype_id System);
                        return go!(self: to DoctypeIdentifierDoubleQuoted System);
                    }
                    '\'' => {
                        go!(self: clear_doctype_id System);
                        return go!(self: to DoctypeIdentifierSingleQuoted System);
                    }
                    _ => {
                        go!(self: error);
                        go!(self: force_quirks);
                        return go!(self: to BogusDoctype);
                    }
                }
            },

            //§ bogus-doctype-state
            states::BogusDoctype => loop {
                match get_char!(self, input)? {
                    '>' => {
                        go!(self: emit_doctype);
                        return go!(self: to Data);
                    }
                    _ => (),
                }
            },

            //§ bogus-comment-state
            states::BogusComment => loop {
                match get_char!(self, input)? {
                    '>' => {
                        go!(self: emit_comment);
                        return go!(self: to Data);
                    }
                    '\0' => {
                        go!(self: push_comment '\u{fffd}');
                    }
                    c => {
                        go!(self: push_comment c);
                    }
                }
            },

            //§ markup-declaration-open-state
            states::MarkupDeclarationOpen => loop {
                if eat_exact!(self, input, "--")? {
                    {
                        go!(self: clear_comment);
                        return go!(self: to CommentStart);
                    };
                } else if eat!(self, input, "doctype")? {
                    {
                        return go!(self: to Doctype);
                    };
                } else {
                    if self
                        .sink
                        .adjusted_current_node_present_but_not_in_html_namespace()
                    {
                        if eat_exact!(self, input, "[CDATA[")? {
                            {
                                go!(self: clear_temp);
                                return go!(self: to CdataSection);
                            };
                        }
                    }
                    {
                        go!(self: error);
                        return go!(self: to BogusComment);
                    };
                }
            },

            //§ cdata-section-state
            states::CdataSection => loop {
                match get_char!(self, input)? {
                    ']' => {
                        return go!(self: to CdataSectionBracket);
                    }
                    '\0' => {
                        go!(self: emit_temp);
                        go!(self: emit '\0');
                    }
                    c => {
                        go!(self: push_temp c);
                    }
                }
            },

            //§ cdata-section-bracket
            states::CdataSectionBracket => match get_char!(self, input)? {
                ']' => {
                    return go!(self: to CdataSectionEnd);
                }
                _ => {
                    go!(self: push_temp ']');
                    self.reconsume = true;
                    return go!(self: to CdataSection);
                }
            },

            //§ cdata-section-end
            states::CdataSectionEnd => loop {
                match get_char!(self, input)? {
                    ']' => {
                        go!(self: push_temp ']');
                    }
                    '>' => {
                        go!(self: emit_temp);
                        return go!(self: to Data);
                    }
                    _ => {
                        go!(self: push_temp ']');
                        go!(self: push_temp ']');
                        self.reconsume = true;
                        return go!(self: to CdataSection);
                    }
                }
            },
            //§ END
        }
    }

    fn step_char_ref_tokenizer(&mut self, input: &mut BufferQueue) -> ControlFlow<ProcessResult> {
        // FIXME HACK: Take and replace the tokenizer so we don't
        // double-mut-borrow self.  This is why it's boxed.
        let mut tok = self.char_ref_tokenizer.take().unwrap();
        let outcome = tok.step(self, input);

        let progress = match outcome {
            char_ref::Done => {
                self.process_char_ref(tok.get_result());
                return ControlFlow::Continue(());
            }

            char_ref::Stuck => ControlFlow::Break(ProcessResult::Suspend),
            char_ref::Progress => ControlFlow::Continue(()),
        };

        self.char_ref_tokenizer = Some(tok);
        progress
    }

    fn process_char_ref(&mut self, char_ref: CharRef) {
        let CharRef {
            mut chars,
            mut num_chars,
        } = char_ref;

        if num_chars == 0 {
            chars[0] = '&';
            num_chars = 1;
        }

        for i in 0..num_chars {
            let c = chars[i as usize];
            match self.state {
                states::Data | states::RawData(states::Rcdata) => {
                    go!(self: emit c);
                }

                states::AttributeValue(_) => {
                    go!(self: push_value c);
                }

                _ => panic!(
                    "state {:?} should not be reachable in process_char_ref",
                    self.state
                ),
            }
        }
    }

    /// Indicate that we have reached the end of the input.
    pub fn end(&mut self) {
        // Handle EOF in the char ref sub-tokenizer, if there is one.
        // Do this first because it might un-consume stuff.
        let mut input = BufferQueue::new();
        match self.char_ref_tokenizer.take() {
            None => (),
            Some(mut tok) => {
                tok.end_of_file(self, &mut input);
                self.process_char_ref(tok.get_result());
            }
        }

        // Process all remaining buffered input.
        // If we're waiting for lookahead, we're not gonna get it.
        self.at_eof = true;
        assert!(matches!(self.run(&mut input), TokenizerResult::Done));
        assert!(input.is_empty());

        loop {
            match self.eof_step() {
                ControlFlow::Continue(()) => (),
                ControlFlow::Break(ProcessResult::Suspend) => break,
                ControlFlow::Break(ProcessResult::Break) => unreachable!(),
            }
        }

        self.sink.end();

        if self.opts.profile {
            self.dump_profile();
        }
    }

    fn dump_profile(&self) {
        let mut results: Vec<(states::State, u64)> =
            self.state_profile.iter().map(|(s, t)| (*s, *t)).collect();
        results.sort_by(|&(_, x), &(_, y)| y.cmp(&x));

        let total: u64 = results
            .iter()
            .map(|&(_, t)| t)
            .fold(0, ::std::ops::Add::add);
        println!("\nTokenizer profile, in nanoseconds");
        println!("\n{:12}         total in token sink", self.time_in_sink);
        println!("\n{:12}         total in tokenizer", total);

        for (k, v) in results.into_iter() {
            let pct = 100.0 * (v as f64) / (total as f64);
            println!("{:12}  {:4.1}%  {:?}", v, pct, k);
        }
    }

    fn eof_step(&mut self) -> ControlFlow<ProcessResult> {
        match self.state {
            states::Data
            | states::RawData(Rcdata)
            | states::RawData(Rawtext)
            | states::RawData(ScriptData)
            | states::Plaintext => {
                self.emit_eof();
                ControlFlow::Break(ProcessResult::Suspend)
            }

            states::TagName
            | states::RawData(ScriptDataEscaped(_))
            | states::BeforeAttributeName
            | states::AttributeName
            | states::AfterAttributeName
            | states::BeforeAttributeValue
            | states::AttributeValue(_)
            | states::AfterAttributeValueQuoted
            | states::SelfClosingStartTag
            | states::ScriptDataEscapedDash(_)
            | states::ScriptDataEscapedDashDash(_) => {
                go!(self: error_eof);
                return go!(self: to Data);
            }

            states::TagOpen => {
                go!(self: error_eof);
                go!(self: emit '<');
                return go!(self: to Data);
            }

            states::EndTagOpen => {
                go!(self: error_eof);
                go!(self: emit '<');
                go!(self: emit '/');
                return go!(self: to Data);
            }

            states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => {
                return go!(self: to RawData ScriptDataEscaped DoubleEscaped);
            }

            states::RawLessThanSign(kind) => {
                go!(self: emit '<');
                return go!(self: to RawData kind);
            }

            states::RawEndTagOpen(kind) => {
                go!(self: emit '<');
                go!(self: emit '/');
                return go!(self: to RawData kind);
            }

            states::RawEndTagName(kind) => {
                go!(self: emit '<');
                go!(self: emit '/');
                go!(self: emit_temp);
                return go!(self: to RawData kind);
            }

            states::ScriptDataEscapeStart(kind) => {
                return go!(self: to RawData ScriptDataEscaped kind);
            }

            states::ScriptDataEscapeStartDash => {
                return go!(self: to RawData ScriptData);
            }

            states::ScriptDataDoubleEscapeEnd => {
                return go!(self: to RawData ScriptDataEscaped DoubleEscaped);
            }

            states::CommentStart
            | states::CommentStartDash
            | states::Comment
            | states::CommentEndDash
            | states::CommentEnd
            | states::CommentEndBang => {
                go!(self: error_eof);
                go!(self: emit_comment);
                return go!(self: to Data);
            }

            states::Doctype | states::BeforeDoctypeName => {
                go!(self: error_eof);
                go!(self: create_doctype);
                go!(self: force_quirks);
                go!(self: emit_doctype);
                return go!(self: to Data);
            }

            states::DoctypeName
            | states::AfterDoctypeName
            | states::AfterDoctypeKeyword(_)
            | states::BeforeDoctypeIdentifier(_)
            | states::DoctypeIdentifierDoubleQuoted(_)
            | states::DoctypeIdentifierSingleQuoted(_)
            | states::AfterDoctypeIdentifier(_)
            | states::BetweenDoctypePublicAndSystemIdentifiers => {
                go!(self: error_eof);
                go!(self: force_quirks);
                go!(self: emit_doctype);
                return go!(self: to Data);
            }

            states::BogusDoctype => {
                go!(self: emit_doctype);
                return go!(self: to Data);
            }

            states::BogusComment => {
                go!(self: emit_comment);
                return go!(self: to Data);
            }

            states::MarkupDeclarationOpen => {
                go!(self: error);
                return go!(self: to BogusComment);
            }

            states::CdataSection => {
                go!(self: emit_temp);
                go!(self: error_eof);
                return go!(self: to Data);
            }

            states::CdataSectionBracket => {
                go!(self: push_temp ']');
                return go!(self: to CdataSection);
            }

            states::CdataSectionEnd => {
                go!(self: push_temp ']');
                go!(self: push_temp ']');
                return go!(self: to CdataSection);
            }
        }
    }
}

#[cfg(test)]
#[allow(non_snake_case)]
mod test {
    use super::option_push; // private items

    use super::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts};

    use super::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
    use super::interface::{EndTag, StartTag, Tag};
    use super::interface::{TagToken, Token};

    use crate::util::buffer_queue::BufferQueue;
    use std::mem::replace;

    // LinesMatch implements the TokenSink trait. It is used for testing to see
    // if current_line is being updated when process_token is called. The lines
    // vector is a collection of the line numbers that each token is on.
    struct LinesMatch {
        current_str: String,
        current_str_line: u64,
        lines: Vec<(u64, Token)>,
    }

    impl LinesMatch {
        fn new() -> LinesMatch {
            LinesMatch {
                current_str: String::new(),
                current_str_line: 0,
                lines: vec![],
            }
        }

        fn push(&mut self, token: Token, line_number: u64) {
            self.finish_str();
            self.lines.push((line_number, token));
        }

        fn finish_str(&mut self) {
            if self.current_str.len() > 0 {
                let s = replace(&mut self.current_str, String::new());
                self.push(CharacterTokens(s), self.current_str_line);
                self.current_str_line = 0;
            }
        }
    }

    impl TokenSink for LinesMatch {
        fn end(&mut self) {
            self.finish_str();
        }

        fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult {
            match token {
                CharacterTokens(b) => {
                    self.current_str_line = line_number;
                    self.current_str.push_str(&b);
                }

                NullCharacterToken => {
                    self.current_str.push('\0');
                }

                token @ ParseError { .. } => {
                    self.push(token, line_number);
                }

                TagToken(mut t) => {
                    // The spec seems to indicate that one can emit
                    // erroneous end tags with attrs, but the test
                    // cases don't contain them.
                    match t.kind {
                        EndTag => {
                            t.self_closing = false;
                            t.attrs = vec![];
                        }
                        _ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)),
                    }
                    self.push(TagToken(t), line_number);
                }

                EOFToken => (),

                _ => self.push(token, line_number),
            }
            TokenSinkResult::Continue
        }
    }

    // Take in tokens, process them, and return vector with line
    // numbers that each token is on
    fn tokenize(input: Vec<String>, opts: TokenizerOpts) -> Vec<(u64, Token)> {
        let sink = LinesMatch::new();
        let mut tok = Tokenizer::new(sink, opts);
        let mut buffer = BufferQueue::new();
        for chunk in input.into_iter() {
            buffer.push_back(chunk);
            let _ = tok.feed(&mut buffer);
        }
        tok.end();
        tok.sink.lines
    }

    // FUTURE: replace with std::assert_matches once stable
    macro_rules! assert_matches {
        ($expr:expr, $($args:tt)+) => {
            assert!(matches!($expr, $($args)*), "left matches right\n left: {:?}\nright: {}", &$expr, stringify!($($args)*))
        };
    }

    #[test]
    fn push_to_None_gives_singleton() {
        let mut s: Option<String> = None;
        option_push(&mut s, 'x');
        assert_eq!(s, Some("x".into()));
    }

    #[test]
    fn push_to_empty_appends() {
        let mut s: Option<String> = Some(String::new());
        option_push(&mut s, 'x');
        assert_eq!(s, Some("x".into()));
    }

    #[test]
    fn push_to_nonempty_appends() {
        let mut s: Option<String> = Some(String::from("y"));
        option_push(&mut s, 'x');
        assert_eq!(s, Some("yx".into()));
    }

    fn opts() -> TokenizerOpts {
        TokenizerOpts {
            exact_errors: false,
            discard_bom: true,
            profile: false,
            initial_state: None,
            last_start_tag_name: None,
        }
    }

    #[test]
    fn check_lines() {
        let opts = opts();
        let vector = vec![
            String::from("<a>\n"),
            String::from("<b>\n"),
            String::from("</b>\n"),
            String::from("</a>\n"),
        ];
        let results = tokenize(vector, opts);
        assert_matches!(
            &results[..],
            [
                (1, Token::TagToken(Tag{name: n1, kind: StartTag, ..})),
                (2, CharacterTokens(c1)),
                (2, Token::TagToken(Tag{name: n2, kind: StartTag, ..})),
                (3, CharacterTokens(c2)),
                (3, Token::TagToken(Tag{name: n3, kind: EndTag, ..})),
                (4, CharacterTokens(c3)),
                (4, Token::TagToken(Tag{name: n4, kind: EndTag, ..})),
                (5, CharacterTokens(c4)),
            ] if
            n1 == "a" && c1 == "\n" &&
            n2 == "b" && c2 == "\n" &&
            n3 == "b" && c3 == "\n" &&
            n4 == "a" && c4 == "\n"
        );
    }

    #[test]
    fn check_lines_with_new_line() {
        let opts = opts();
        let vector = vec![
            String::from("<a>\r\n"),
            String::from("<b>\r\n"),
            String::from("</b>\r\n"),
            String::from("</a>\r\n"),
        ];
        let results = tokenize(vector, opts);
        assert_matches!(
            &results[..],
            [
                (1, Token::TagToken(Tag{name: n1, kind: StartTag, ..})),
                (2, CharacterTokens(c1)),
                (2, Token::TagToken(Tag{name: n2, kind: StartTag, ..})),
                (3, CharacterTokens(c2)),
                (3, Token::TagToken(Tag{name: n3, kind: EndTag, ..})),
                (4, CharacterTokens(c3)),
                (4, Token::TagToken(Tag{name: n4, kind: EndTag, ..})),
                (5, CharacterTokens(c4)),
            ] if
            n1 == "a" && c1 == "\n" &&
            n2 == "b" && c2 == "\n" &&
            n3 == "b" && c3 == "\n" &&
            n4 == "a" && c4 == "\n"
        );
    }

    #[test]
    #[cfg(not(feature = "named-entities"))]
    fn named_entities() {
        let opts = opts();
        let vector = vec![String::from("&amp;\r\n"), String::from("&aamp;\r\n")];
        let expected = vec![(3, Token::CharacterTokens("&amp;\n&aamp;\n".into()))];
        let results = tokenize(vector, opts);
        assert_eq!(results, expected);
    }

    #[test]
    #[cfg(feature = "named-entities")]
    fn named_entities() {
        use crate::error::{CharRefError, Error};

        let opts = opts();
        let vector = vec![String::from("&amp;\r\n"), String::from("&aamp;\r\n")];
        let results = tokenize(vector, opts);
        assert_matches!(
            &results[..],
            [
                (3, CharacterTokens(c1)),
                (
                    3,
                    ParseError{error: Error::CharRef(CharRefError::InvalidNamedCharRef), ..},
                ),
                (4, CharacterTokens(c2)),
            ] if c1 == "&\n" && c2 == "&aamp;\n"
        );
    }
}