mod utils;

use crate::entities::try_read_character_reference;
use crate::offset::{Offset, Position};
use crate::token::AttrValueSyntax;
use crate::tokenizer::CdataAction;
use crate::{reader::Reader, Emitter, Error};
use utils::{
    ascii_digit_pat, control_pat, ctostr, noncharacter_pat, surrogate_pat, whitespace_pat,
};

pub use utils::State;

pub(super) struct Machine<R, O, E> {
    pub(super) state: State,
    pub(super) emitter: E,
    temporary_buffer: String,
    reader: R,
    to_reconsume: Stack2<Option<char>>,
    character_reference_code: u32,
    return_state: Option<State>,
    current_tag_name: String,
    pub(super) last_start_tag_name: String,
    is_start_tag: bool,
    /// The reader position before the match block in [`consume`].
    position_before_match: O,
    /// * Set to the offset of `<` in [`State::Data`].
    /// * Set to the offset of `-` in [`State::Comment`].
    /// * Set to the offset of `&` in [`State::CharacterReference`].
    some_offset: O,
    /// This boolean flag exists so that the [`NaiveParser`](crate::NaiveParser) can work with any [`Emitter`]
    /// (it cannot call [`Tokenizer::set_state`] using the emitted start tags since they can be of an arbitrary type).
    ///
    /// [`Tokenizer::set_state`]: super::Tokenizer::set_state
    pub(crate) naively_switch_state: bool,
}

impl<R, O, E> Machine<R, O, E>
where
    R: Reader + Position<O>,
    O: Offset,
    E: Emitter<O>,
{
    pub fn new(reader: R, emitter: E) -> Self {
        Self {
            reader,
            emitter,
            state: State::Data,
            to_reconsume: Stack2::default(),
            return_state: None,
            temporary_buffer: String::new(),
            character_reference_code: 0,
            current_tag_name: String::new(),
            last_start_tag_name: String::new(),
            is_start_tag: false,
            position_before_match: O::default(),
            some_offset: O::default(),
            naively_switch_state: false,
        }
    }
}

pub enum ControlToken {
    Eof,
    Continue,
    CdataOpen,
}

#[inline]
pub(super) fn consume<O, R, E>(slf: &mut Machine<R, O, E>) -> Result<ControlToken, R::Error>
where
    O: Offset,
    R: Reader + Position<O>,
    E: Emitter<O>,
{
    macro_rules! mutate_character_reference {
        (* $mul:literal + $x:ident - $sub:literal) => {
            match slf
                .character_reference_code
                .checked_mul($mul)
                .and_then(|cr| cr.checked_add($x as u32 - $sub))
            {
                Some(cr) => slf.character_reference_code = cr,
                None => {
                    // provoke err
                    slf.character_reference_code = 0x110000;
                }
            };
        };
    }

    slf.position_before_match = slf.reader.position();

    match slf.state {
        State::Data => match slf.read_char()? {
            Some('&') => {
                slf.return_state = Some(slf.state);
                slf.state = State::CharacterReference;
                Ok(ControlToken::Continue)
            }
            Some('<') => {
                slf.some_offset = slf.position_before_match;
                slf.state = State::TagOpen;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emit_char('\0');
                Ok(ControlToken::Continue)
            }
            Some(x) => {
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
            None => Ok(ControlToken::Eof),
        },
        State::RcData => match slf.read_char()? {
            Some('&') => {
                slf.return_state = Some(State::RcData);
                slf.state = State::CharacterReference;
                Ok(ControlToken::Continue)
            }
            Some('<') => {
                slf.state = State::RcDataLessThanSign;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            Some(x) => {
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
            None => Ok(ControlToken::Eof),
        },
        State::RawText => match slf.read_char()? {
            Some('<') => {
                slf.state = State::RawTextLessThanSign;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            Some(x) => {
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
            None => Ok(ControlToken::Eof),
        },
        State::ScriptData => match slf.read_char()? {
            Some('<') => {
                slf.state = State::ScriptDataLessThanSign;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            Some(x) => {
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
            None => Ok(ControlToken::Eof),
        },
        State::PlainText => match slf.read_char()? {
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            Some(x) => {
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
            None => Ok(ControlToken::Eof),
        },
        State::TagOpen => match slf.read_char()? {
            Some('!') => {
                slf.state = State::MarkupDeclarationOpen;
                Ok(ControlToken::Continue)
            }
            Some('/') => {
                slf.state = State::EndTagOpen;
                Ok(ControlToken::Continue)
            }
            Some(x) if x.is_ascii_alphabetic() => {
                slf.init_start_tag();
                slf.state = State::TagName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
            c @ Some('?') => {
                slf.emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName);
                slf.emitter.init_comment(slf.reader.position());
                slf.state = State::BogusComment;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofBeforeTagName);
                slf.emit_char('<');
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emit_error(Error::InvalidFirstCharacterOfTagName);
                slf.state = State::Data;
                slf.emit_char('<');
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::EndTagOpen => match slf.read_char()? {
            Some(x) if x.is_ascii_alphabetic() => {
                slf.init_end_tag();
                slf.state = State::TagName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_error(Error::MissingEndTagName);
                slf.state = State::Data;
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofBeforeTagName);
                slf.emit_chars(b"</");
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emit_error(Error::InvalidFirstCharacterOfTagName);
                slf.emitter.init_comment(slf.reader.position());
                slf.state = State::BogusComment;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
        },
        State::TagName => match slf.read_char()? {
            Some(whitespace_pat!()) => {
                slf.emitter.terminate_tag_name(slf.position_before_match);
                slf.state = State::BeforeAttributeName;
                Ok(ControlToken::Continue)
            }
            Some('/') => {
                slf.emitter.terminate_tag_name(slf.position_before_match);
                slf.state = State::SelfClosingStartTag;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emitter.terminate_tag_name(slf.position_before_match);
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.push_tag_name("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            Some(x) => {
                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInTag);
                Ok(ControlToken::Eof)
            }
        },
        State::RcDataLessThanSign => match slf.read_char()? {
            Some('/') => {
                slf.temporary_buffer.clear();
                slf.state = State::RcDataEndTagOpen;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_char('<');
                slf.state = State::RcData;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::RcDataEndTagOpen => match slf.read_char()? {
            Some(x) if x.is_ascii_alphabetic() => {
                slf.init_end_tag();
                slf.state = State::RcDataEndTagName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_chars(b"</");
                slf.state = State::RcData;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::RcDataEndTagName => match slf.read_char()? {
            Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
                slf.state = State::BeforeAttributeName;
                Ok(ControlToken::Continue)
            }
            Some('/') if slf.current_end_tag_is_appropriate() => {
                slf.state = State::SelfClosingStartTag;
                Ok(ControlToken::Continue)
            }
            Some('>') if slf.current_end_tag_is_appropriate() => {
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            Some(x) if x.is_ascii_alphabetic() => {
                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
                slf.temporary_buffer.push(x);
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_chars(b"</");
                slf.flush_buffer_characters();

                slf.state = State::RcData;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::RawTextLessThanSign => match slf.read_char()? {
            Some('/') => {
                slf.temporary_buffer.clear();
                slf.state = State::RawTextEndTagOpen;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_char('<');
                slf.state = State::RawText;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::RawTextEndTagOpen => match slf.read_char()? {
            Some(x) if x.is_ascii_alphabetic() => {
                slf.init_end_tag();
                slf.state = State::RawTextEndTagName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_chars(b"</");
                slf.state = State::RawText;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::RawTextEndTagName => match slf.read_char()? {
            Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
                slf.state = State::BeforeAttributeName;
                Ok(ControlToken::Continue)
            }
            Some('/') if slf.current_end_tag_is_appropriate() => {
                slf.state = State::SelfClosingStartTag;
                Ok(ControlToken::Continue)
            }
            Some('>') if slf.current_end_tag_is_appropriate() => {
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            Some(x) if x.is_ascii_alphabetic() => {
                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
                slf.temporary_buffer.push(x);
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_chars(b"</");
                slf.flush_buffer_characters();

                slf.state = State::RawText;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataLessThanSign => match slf.read_char()? {
            Some('/') => {
                slf.temporary_buffer.clear();
                slf.state = State::ScriptDataEndTagOpen;
                Ok(ControlToken::Continue)
            }
            Some('!') => {
                slf.state = State::ScriptDataEscapeStart;
                slf.emit_chars(b"<!");
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_char('<');
                slf.state = State::ScriptData;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEndTagOpen => match slf.read_char()? {
            Some(x) if x.is_ascii_alphabetic() => {
                slf.init_end_tag();
                slf.state = State::ScriptDataEndTagName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_chars(b"</");
                slf.state = State::ScriptData;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEndTagName => match slf.read_char()? {
            Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
                slf.state = State::BeforeAttributeName;
                Ok(ControlToken::Continue)
            }
            Some('/') if slf.current_end_tag_is_appropriate() => {
                slf.state = State::SelfClosingStartTag;
                Ok(ControlToken::Continue)
            }
            Some('>') if slf.current_end_tag_is_appropriate() => {
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            Some(x) if x.is_ascii_alphabetic() => {
                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
                slf.temporary_buffer.push(x.to_ascii_lowercase());
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_chars(b"</");
                slf.flush_buffer_characters();
                slf.state = State::Data;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEscapeStart => match slf.read_char()? {
            Some('-') => {
                slf.state = State::ScriptDataEscapeStartDash;
                slf.emit_char('-');
                Ok(ControlToken::Continue)
            }
            c => {
                slf.state = State::ScriptData;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEscapeStartDash => match slf.read_char()? {
            Some('-') => {
                slf.state = State::ScriptDataEscapedDashDash;
                slf.emit_char('-');
                Ok(ControlToken::Continue)
            }
            c => {
                slf.state = State::ScriptData;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEscaped => match slf.read_char()? {
            Some('-') => {
                slf.state = State::ScriptDataEscapedDash;
                slf.emit_char('-');
                Ok(ControlToken::Continue)
            }
            Some('<') => {
                slf.state = State::ScriptDataEscapedLessThanSign;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEscapedDash => match slf.read_char()? {
            Some('-') => {
                slf.state = State::ScriptDataEscapedDashDash;
                slf.emit_char('-');
                Ok(ControlToken::Continue)
            }
            Some('<') => {
                slf.state = State::ScriptDataEscapedLessThanSign;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.state = State::ScriptDataEscaped;
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.state = State::ScriptDataEscaped;
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEscapedDashDash => match slf.read_char()? {
            Some('-') => {
                slf.emit_char('-');
                Ok(ControlToken::Continue)
            }
            Some('<') => {
                slf.state = State::ScriptDataEscapedLessThanSign;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.state = State::ScriptData;
                slf.emit_char('>');
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.state = State::ScriptDataEscaped;
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.state = State::ScriptDataEscaped;
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEscapedLessThanSign => match slf.read_char()? {
            Some('/') => {
                slf.temporary_buffer.clear();
                slf.state = State::ScriptDataEscapedEndTagOpen;
                Ok(ControlToken::Continue)
            }
            Some(x) if x.is_ascii_alphabetic() => {
                slf.temporary_buffer.clear();
                slf.emit_char('<');
                slf.state = State::ScriptDataDoubleEscapeStart;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_char('<');
                slf.state = State::ScriptDataEscaped;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEscapedEndTagOpen => match slf.read_char()? {
            Some(x) if x.is_ascii_alphabetic() => {
                slf.init_end_tag();
                slf.state = State::ScriptDataEscapedEndTagName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_chars(b"</");
                slf.unread_char(c);
                slf.state = State::ScriptDataEscaped;
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataEscapedEndTagName => match slf.read_char()? {
            Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
                slf.state = State::BeforeAttributeName;
                Ok(ControlToken::Continue)
            }
            Some('/') if slf.current_end_tag_is_appropriate() => {
                slf.state = State::SelfClosingStartTag;
                Ok(ControlToken::Continue)
            }
            Some('>') if slf.current_end_tag_is_appropriate() => {
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            Some(x) if x.is_ascii_alphabetic() => {
                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
                slf.temporary_buffer.push(x);
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_chars(b"</");
                slf.flush_buffer_characters();
                slf.state = State::ScriptDataEscaped;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataDoubleEscapeStart => match slf.read_char()? {
            Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
                if slf.temporary_buffer == "script" {
                    slf.state = State::ScriptDataDoubleEscaped;
                } else {
                    slf.state = State::ScriptDataEscaped;
                }
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
            Some(x) if x.is_ascii_alphabetic() => {
                slf.temporary_buffer.push(x.to_ascii_lowercase());
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
            c => {
                slf.state = State::ScriptDataEscaped;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataDoubleEscaped => match slf.read_char()? {
            Some('-') => {
                slf.state = State::ScriptDataDoubleEscapedDash;
                slf.emit_char('-');
                Ok(ControlToken::Continue)
            }
            Some('<') => {
                slf.state = State::ScriptDataDoubleEscapedLessThanSign;
                slf.emit_char('<');
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataDoubleEscapedDash => match slf.read_char()? {
            Some('-') => {
                slf.state = State::ScriptDataDoubleEscapedDashDash;
                slf.emit_char('-');
                Ok(ControlToken::Continue)
            }
            Some('<') => {
                slf.state = State::ScriptDataDoubleEscapedLessThanSign;
                slf.emit_char('<');
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.state = State::ScriptDataDoubleEscaped;
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.state = State::ScriptDataDoubleEscaped;
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataDoubleEscapedDashDash => match slf.read_char()? {
            Some('-') => {
                slf.emit_char('-');
                Ok(ControlToken::Continue)
            }
            Some('<') => {
                slf.emit_char('<');
                slf.state = State::ScriptDataDoubleEscapedLessThanSign;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_char('>');
                slf.state = State::ScriptData;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.state = State::ScriptDataDoubleEscaped;
                slf.emit_char('\u{fffd}');
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.state = State::ScriptDataDoubleEscaped;
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataDoubleEscapedLessThanSign => match slf.read_char()? {
            Some('/') => {
                slf.temporary_buffer.clear();
                slf.state = State::ScriptDataDoubleEscapeEnd;
                slf.emit_char('/');
                Ok(ControlToken::Continue)
            }
            c => {
                slf.state = State::ScriptDataDoubleEscaped;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::ScriptDataDoubleEscapeEnd => match slf.read_char()? {
            Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
                if slf.temporary_buffer == "script" {
                    slf.state = State::ScriptDataEscaped;
                } else {
                    slf.state = State::ScriptDataDoubleEscaped;
                }

                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
            Some(x) if x.is_ascii_alphabetic() => {
                slf.temporary_buffer.push(x.to_ascii_lowercase());
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
            c => {
                slf.state = State::ScriptDataDoubleEscaped;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::BeforeAttributeName => match slf.read_char()? {
            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
            c @ Some('/' | '>') | c @ None => {
                slf.state = State::AfterAttributeName;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
            Some('=') => {
                slf.emit_error(Error::UnexpectedEqualsSignBeforeAttributeName);
                slf.emitter.init_attribute_name(slf.reader.position());
                slf.emitter.push_attribute_name("=");
                slf.state = State::AttributeName;
                Ok(ControlToken::Continue)
            }
            Some(x) => {
                slf.emitter.init_attribute_name(slf.position_before_match);
                slf.state = State::AttributeName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
        },
        State::AttributeName => match slf.read_char()? {
            c @ Some(whitespace_pat!() | '/' | '>') | c @ None => {
                slf.emitter
                    .terminate_attribute_name(slf.position_before_match);
                slf.state = State::AfterAttributeName;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
            Some('=') => {
                slf.emitter
                    .terminate_attribute_name(slf.position_before_match);
                slf.state = State::BeforeAttributeValue;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_attribute_name("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            Some(x @ '"' | x @ '\'' | x @ '<') => {
                slf.emit_error(Error::UnexpectedCharacterInAttributeName);
                slf.emitter
                    .push_attribute_name(ctostr!(x.to_ascii_lowercase()));
                Ok(ControlToken::Continue)
            }
            Some(x) => {
                slf.emitter
                    .push_attribute_name(ctostr!(x.to_ascii_lowercase()));
                Ok(ControlToken::Continue)
            }
        },
        State::AfterAttributeName => match slf.read_char()? {
            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
            Some('/') => {
                slf.state = State::SelfClosingStartTag;
                Ok(ControlToken::Continue)
            }
            Some('=') => {
                slf.state = State::BeforeAttributeValue;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInTag);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter.init_attribute_name(slf.position_before_match);
                slf.state = State::AttributeName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
        },
        State::BeforeAttributeValue => match slf.read_char()? {
            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
            Some('"') => {
                slf.emitter
                    .init_attribute_value(AttrValueSyntax::DoubleQuoted, slf.reader.position());
                slf.state = State::AttributeValueDoubleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('\'') => {
                slf.emitter
                    .init_attribute_value(AttrValueSyntax::SingleQuoted, slf.reader.position());
                slf.state = State::AttributeValueSingleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_error(Error::MissingAttributeValue);
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emitter
                    .init_attribute_value(AttrValueSyntax::Unquoted, slf.position_before_match);
                slf.state = State::AttributeValueUnquoted;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::AttributeValueDoubleQuoted => match slf.read_char()? {
            Some('"') => {
                slf.emitter.terminate_attribute_value(
                    // We cannot simply pass slf.position_before_match because
                    // State::NamedCharacterReference calls Tokenizer::unread_char
                    // which Reader::position doesn't account for.
                    // TODO: pass slf.position_before_match once CharacterReference has been converted to a function call
                    slf.reader.position() - slf.reader.len_of_char_in_current_encoding('"'),
                );
                slf.state = State::AfterAttributeValueQuoted;
                Ok(ControlToken::Continue)
            }
            Some('&') => {
                slf.return_state = Some(State::AttributeValueDoubleQuoted);
                slf.state = State::CharacterReference;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_attribute_value("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInTag);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter.push_attribute_value(ctostr!(x));
                Ok(ControlToken::Continue)
            }
        },
        State::AttributeValueSingleQuoted => match slf.read_char()? {
            Some('\'') => {
                slf.emitter.terminate_attribute_value(
                    // We cannot simply pass slf.position_before_match because
                    // State::NamedCharacterReference calls Tokenizer::unread_char
                    // which Reader::position doesn't account for.
                    // TODO: pass slf.position_before_match once CharacterReference has been converted to a function call
                    slf.reader.position() - slf.reader.len_of_char_in_current_encoding('\''),
                );
                slf.state = State::AfterAttributeValueQuoted;
                Ok(ControlToken::Continue)
            }
            Some('&') => {
                slf.return_state = Some(State::AttributeValueSingleQuoted);
                slf.state = State::CharacterReference;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_attribute_value("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInTag);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter.push_attribute_value(ctostr!(x));
                Ok(ControlToken::Continue)
            }
        },
        State::AttributeValueUnquoted => match slf.read_char()? {
            Some(whitespace_pat!()) => {
                slf.emitter.terminate_attribute_value(
                    // We cannot simply pass slf.position_before_match because
                    // State::NamedCharacterReference calls Tokenizer::unread_char
                    // which Reader::position doesn't account for.
                    // TODO: pass slf.position_before_match once CharacterReference has been converted to a function call
                    slf.reader.position() - slf.reader.len_of_char_in_current_encoding(' '),
                );
                slf.state = State::BeforeAttributeName;
                Ok(ControlToken::Continue)
            }
            Some('&') => {
                slf.return_state = Some(State::AttributeValueUnquoted);
                slf.state = State::CharacterReference;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_attribute_value("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            Some(x @ '"' | x @ '\'' | x @ '<' | x @ '=' | x @ '\u{60}') => {
                slf.emit_error(Error::UnexpectedCharacterInUnquotedAttributeValue);
                slf.emitter.push_attribute_value(ctostr!(x));
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInTag);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter.push_attribute_value(ctostr!(x));
                Ok(ControlToken::Continue)
            }
        },
        State::AfterAttributeValueQuoted => match slf.read_char()? {
            Some(whitespace_pat!()) => {
                slf.state = State::BeforeAttributeName;
                Ok(ControlToken::Continue)
            }
            Some('/') => {
                slf.state = State::SelfClosingStartTag;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInTag);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emit_error(Error::MissingWhitespaceBetweenAttributes);
                slf.state = State::BeforeAttributeName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
        },
        State::SelfClosingStartTag => match slf.read_char()? {
            Some('>') => {
                slf.emitter.set_self_closing(
                    slf.position_before_match - slf.reader.len_of_char_in_current_encoding('/')
                        ..slf.position_before_match,
                );
                slf.state = State::Data;
                slf.emit_current_tag();
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInTag);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emit_error(Error::UnexpectedSolidusInTag);
                slf.state = State::BeforeAttributeName;
                slf.unread_char(Some(x));
                Ok(ControlToken::Continue)
            }
        },
        State::BogusComment => match slf.read_char()? {
            Some('>') => {
                slf.state = State::Data;
                slf.emitter.emit_current_comment(slf.position_before_match);
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emitter.emit_current_comment(slf.position_before_match);
                Ok(ControlToken::Eof)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_comment("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            Some(x) => {
                slf.emitter.push_comment(ctostr!(x));
                Ok(ControlToken::Continue)
            }
        },
        State::MarkupDeclarationOpen => match slf.read_char()? {
            Some('-') if slf.try_read_string("-", true)? => {
                slf.emitter.init_comment(slf.reader.position());
                slf.state = State::CommentStart;
                Ok(ControlToken::Continue)
            }
            Some('d' | 'D') if slf.try_read_string("octype", false)? => {
                slf.state = State::Doctype;
                Ok(ControlToken::Continue)
            }
            Some('[') if slf.try_read_string("CDATA[", true)? => Ok(ControlToken::CdataOpen),
            c => {
                slf.emit_error(Error::IncorrectlyOpenedComment);
                slf.emitter.init_comment(slf.position_before_match);
                slf.state = State::BogusComment;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::CommentStart => match slf.read_char()? {
            Some('-') => {
                slf.state = State::CommentStartDash;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_error(Error::AbruptClosingOfEmptyComment);
                slf.state = State::Data;
                slf.emitter.emit_current_comment(slf.position_before_match);
                Ok(ControlToken::Continue)
            }
            c => {
                slf.unread_char(c);
                slf.state = State::Comment;
                Ok(ControlToken::Continue)
            }
        },
        State::CommentStartDash => match slf.read_char()? {
            Some('-') => {
                slf.state = State::CommentEnd;
                Ok(ControlToken::Continue)
            }
            Some(c @ '>') => {
                slf.emit_error(Error::AbruptClosingOfEmptyComment);
                slf.state = State::Data;
                slf.emitter.emit_current_comment(
                    slf.position_before_match - slf.reader.len_of_char_in_current_encoding(c),
                );
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInComment);
                slf.emitter.emit_current_comment(
                    slf.position_before_match - slf.reader.len_of_char_in_current_encoding('-'),
                );
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emitter.push_comment("-");
                slf.unread_char(c);
                slf.state = State::Comment;
                Ok(ControlToken::Continue)
            }
        },
        State::Comment => match slf.read_char()? {
            Some('<') => {
                slf.emitter.push_comment("<");
                slf.state = State::CommentLessThanSign;
                Ok(ControlToken::Continue)
            }
            Some('-') => {
                slf.some_offset = slf.position_before_match;
                slf.state = State::CommentEndDash;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_comment("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInComment);
                slf.emitter.emit_current_comment(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter.push_comment(ctostr!(x));
                Ok(ControlToken::Continue)
            }
        },
        State::CommentLessThanSign => match slf.read_char()? {
            Some('!') => {
                slf.emitter.push_comment("!");
                slf.state = State::CommentLessThanSignBang;
                Ok(ControlToken::Continue)
            }
            Some('<') => {
                slf.emitter.push_comment("<");
                Ok(ControlToken::Continue)
            }
            c => {
                slf.unread_char(c);
                slf.state = State::Comment;
                Ok(ControlToken::Continue)
            }
        },
        State::CommentLessThanSignBang => match slf.read_char()? {
            Some('-') => {
                slf.state = State::CommentLessThanSignBangDash;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.unread_char(c);
                slf.state = State::Comment;
                Ok(ControlToken::Continue)
            }
        },
        State::CommentLessThanSignBangDash => match slf.read_char()? {
            Some('-') => {
                slf.state = State::CommentLessThanSignBangDashDash;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.unread_char(c);
                slf.state = State::CommentEndDash;
                Ok(ControlToken::Continue)
            }
        },
        State::CommentLessThanSignBangDashDash => match slf.read_char()? {
            c @ Some('>') | c @ None => {
                slf.unread_char(c);
                slf.state = State::CommentEnd;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_error(Error::NestedComment);
                slf.unread_char(c);
                slf.state = State::CommentEnd;
                Ok(ControlToken::Continue)
            }
        },
        State::CommentEndDash => match slf.read_char()? {
            Some('-') => {
                slf.state = State::CommentEnd;
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInComment);
                slf.emitter.emit_current_comment(slf.some_offset);
                Ok(ControlToken::Eof)
            }
            c => {
                slf.emitter.push_comment("-");
                slf.unread_char(c);
                slf.state = State::Comment;
                Ok(ControlToken::Continue)
            }
        },
        State::CommentEnd => match slf.read_char()? {
            Some('>') => {
                slf.state = State::Data;
                slf.emitter.emit_current_comment(slf.some_offset);
                Ok(ControlToken::Continue)
            }
            Some('!') => {
                slf.state = State::CommentEndBang;
                Ok(ControlToken::Continue)
            }
            Some('-') => {
                slf.emitter.push_comment("-");
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInComment);
                slf.emitter.emit_current_comment(slf.some_offset);
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emitter.push_comment("-");
                slf.emitter.push_comment("-");
                slf.unread_char(c);
                slf.state = State::Comment;
                Ok(ControlToken::Continue)
            }
        },
        State::CommentEndBang => match slf.read_char()? {
            Some('-') => {
                slf.emitter.push_comment("-");
                slf.emitter.push_comment("-");
                slf.emitter.push_comment("!");
                slf.state = State::CommentEndDash;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_error(Error::IncorrectlyClosedComment);
                slf.state = State::Data;
                slf.emitter.emit_current_comment(slf.some_offset);
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInComment);
                slf.emitter.emit_current_comment(slf.some_offset);
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emitter.push_comment("-");
                slf.emitter.push_comment("-");
                slf.emitter.push_comment("!");
                slf.state = State::Comment;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::Doctype => match slf.read_char()? {
            Some(whitespace_pat!()) => {
                slf.state = State::BeforeDoctypeName;
                Ok(ControlToken::Continue)
            }
            c @ Some('>') => {
                slf.unread_char(c);
                slf.state = State::BeforeDoctypeName;
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.init_doctype();
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emit_error(Error::MissingWhitespaceBeforeDoctypeName);
                slf.unread_char(c);
                slf.state = State::BeforeDoctypeName;
                Ok(ControlToken::Continue)
            }
        },
        State::BeforeDoctypeName => match slf.read_char()? {
            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.init_doctype();
                slf.emitter.init_doctype_name(slf.position_before_match);
                slf.emitter.push_doctype_name("\u{fffd}");
                slf.state = State::DoctypeName;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_error(Error::MissingDoctypeName);
                slf.init_doctype();
                slf.emitter.set_force_quirks();
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.init_doctype();
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.init_doctype();
                slf.emitter.init_doctype_name(slf.position_before_match);
                slf.emitter
                    .push_doctype_name(ctostr!(x.to_ascii_lowercase()));
                slf.state = State::DoctypeName;
                Ok(ControlToken::Continue)
            }
        },
        State::DoctypeName => match slf.read_char()? {
            Some(whitespace_pat!()) => {
                slf.emitter
                    .terminate_doctype_name(slf.position_before_match);
                slf.state = State::AfterDoctypeName;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emitter
                    .terminate_doctype_name(slf.position_before_match);
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_doctype_name("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.emitter
                    .terminate_doctype_name(slf.position_before_match);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter
                    .push_doctype_name(ctostr!(x.to_ascii_lowercase()));
                Ok(ControlToken::Continue)
            }
        },
        State::AfterDoctypeName => match slf.read_char()? {
            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
            Some('>') => {
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            Some('p' | 'P') if slf.try_read_string("ublic", false)? => {
                slf.state = State::AfterDoctypePublicKeyword;
                Ok(ControlToken::Continue)
            }
            Some('s' | 'S') if slf.try_read_string("ystem", false)? => {
                slf.state = State::AfterDoctypeSystemKeyword;
                Ok(ControlToken::Continue)
            }
            c @ Some(_) => {
                slf.emit_error(Error::InvalidCharacterSequenceAfterDoctypeName);
                slf.emitter.set_force_quirks();
                slf.unread_char(c);
                slf.state = State::BogusDoctype;
                Ok(ControlToken::Continue)
            }
        },
        State::AfterDoctypePublicKeyword => match slf.read_char()? {
            Some(whitespace_pat!()) => {
                slf.state = State::BeforeDoctypePublicIdentifier;
                Ok(ControlToken::Continue)
            }
            Some('"') => {
                slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
                slf.emitter.init_doctype_public_id(slf.reader.position());
                slf.state = State::DoctypePublicIdentifierDoubleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('\'') => {
                slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
                slf.emitter.init_doctype_public_id(slf.reader.position());
                slf.state = State::DoctypePublicIdentifierSingleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_error(Error::MissingDoctypePublicIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
                slf.emitter.set_force_quirks();
                slf.unread_char(c);
                slf.state = State::BogusDoctype;
                Ok(ControlToken::Continue)
            }
        },
        State::BeforeDoctypePublicIdentifier => match slf.read_char()? {
            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
            Some('"') => {
                slf.emitter.init_doctype_public_id(slf.reader.position());
                slf.state = State::DoctypePublicIdentifierDoubleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('\'') => {
                slf.emitter.init_doctype_public_id(slf.reader.position());
                slf.state = State::DoctypePublicIdentifierSingleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_error(Error::MissingDoctypePublicIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
                slf.emitter.set_force_quirks();
                slf.unread_char(c);
                slf.state = State::BogusDoctype;
                Ok(ControlToken::Continue)
            }
        },
        State::DoctypePublicIdentifierDoubleQuoted => match slf.read_char()? {
            Some('"') => {
                slf.emitter
                    .terminate_doctype_public_id(slf.position_before_match);
                slf.state = State::AfterDoctypePublicIdentifier;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_doctype_public_id("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emitter
                    .terminate_doctype_public_id(slf.position_before_match);
                slf.emit_error(Error::AbruptDoctypePublicIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emitter
                    .terminate_doctype_public_id(slf.reader.position());
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter.push_doctype_public_id(ctostr!(x));
                Ok(ControlToken::Continue)
            }
        },
        State::DoctypePublicIdentifierSingleQuoted => match slf.read_char()? {
            Some('\'') => {
                slf.emitter
                    .terminate_doctype_public_id(slf.position_before_match);
                slf.state = State::AfterDoctypePublicIdentifier;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_doctype_public_id("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emitter
                    .terminate_doctype_public_id(slf.position_before_match);
                slf.emit_error(Error::AbruptDoctypePublicIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emitter
                    .terminate_doctype_public_id(slf.reader.position());
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter.push_doctype_public_id(ctostr!(x));
                Ok(ControlToken::Continue)
            }
        },
        State::AfterDoctypePublicIdentifier => match slf.read_char()? {
            Some(whitespace_pat!()) => {
                slf.state = State::BetweenDoctypePublicAndSystemIdentifiers;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            Some('"') => {
                slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
                slf.emitter.init_doctype_system_id(slf.reader.position());
                slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('\'') => {
                slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
                slf.emitter.init_doctype_system_id(slf.reader.position());
                slf.state = State::DoctypeSystemIdentifierSingleQuoted;
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
                slf.emitter.set_force_quirks();
                slf.unread_char(c);
                slf.state = State::BogusDoctype;
                Ok(ControlToken::Continue)
            }
        },
        State::BetweenDoctypePublicAndSystemIdentifiers => match slf.read_char()? {
            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
            Some('>') => {
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            Some('"') => {
                slf.emitter.init_doctype_system_id(slf.reader.position());
                slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('\'') => {
                slf.emitter.init_doctype_system_id(slf.reader.position());
                slf.state = State::DoctypeSystemIdentifierSingleQuoted;
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::BogusDoctype;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::AfterDoctypeSystemKeyword => match slf.read_char()? {
            Some(whitespace_pat!()) => {
                slf.state = State::BeforeDoctypeSystemIdentifier;
                Ok(ControlToken::Continue)
            }
            Some('"') => {
                slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
                slf.emitter.init_doctype_system_id(slf.reader.position());
                slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('\'') => {
                slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
                slf.emitter.init_doctype_system_id(slf.reader.position());
                slf.state = State::DoctypeSystemIdentifierSingleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_error(Error::MissingDoctypeSystemIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::BogusDoctype;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::BeforeDoctypeSystemIdentifier => match slf.read_char()? {
            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
            Some('"') => {
                slf.emitter.init_doctype_system_id(slf.reader.position());
                slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('\'') => {
                slf.emitter.init_doctype_system_id(slf.reader.position());
                slf.state = State::DoctypeSystemIdentifierSingleQuoted;
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emit_error(Error::MissingDoctypeSystemIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::BogusDoctype;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::DoctypeSystemIdentifierDoubleQuoted => match slf.read_char()? {
            Some('"') => {
                slf.emitter
                    .terminate_doctype_system_id(slf.position_before_match);
                slf.state = State::AfterDoctypeSystemIdentifier;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_doctype_system_id("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emitter
                    .terminate_doctype_system_id(slf.position_before_match);
                slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emitter
                    .terminate_doctype_system_id(slf.reader.position());
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter.push_doctype_system_id(ctostr!(x));
                Ok(ControlToken::Continue)
            }
        },
        State::DoctypeSystemIdentifierSingleQuoted => match slf.read_char()? {
            Some('\'') => {
                slf.emitter
                    .terminate_doctype_system_id(slf.position_before_match);
                slf.state = State::AfterDoctypeSystemIdentifier;
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                slf.emitter.push_doctype_system_id("\u{fffd}");
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.emitter
                    .terminate_doctype_system_id(slf.position_before_match);
                slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
                slf.emitter.set_force_quirks();
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emitter
                    .terminate_doctype_system_id(slf.reader.position());
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emitter.push_doctype_system_id(ctostr!(x));
                Ok(ControlToken::Continue)
            }
        },
        State::AfterDoctypeSystemIdentifier => match slf.read_char()? {
            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
            Some('>') => {
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInDoctype);
                slf.emitter.set_force_quirks();
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            c @ Some(_) => {
                slf.emit_error(Error::UnexpectedCharacterAfterDoctypeSystemIdentifier);
                slf.unread_char(c);
                slf.state = State::BogusDoctype;
                Ok(ControlToken::Continue)
            }
        },
        State::BogusDoctype => match slf.read_char()? {
            Some('>') => {
                slf.state = State::Data;
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Continue)
            }
            Some('\0') => {
                slf.emit_error(Error::UnexpectedNullCharacter);
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emitter.emit_current_doctype(slf.reader.position());
                Ok(ControlToken::Eof)
            }
            Some(_) => Ok(ControlToken::Continue),
        },
        State::CdataSection => match slf.read_char()? {
            Some(']') => {
                slf.state = State::CdataSectionBracket;
                Ok(ControlToken::Continue)
            }
            None => {
                slf.emit_error(Error::EofInCdata);
                Ok(ControlToken::Eof)
            }
            Some(x) => {
                slf.emit_char(x);
                Ok(ControlToken::Continue)
            }
        },
        State::CdataSectionBracket => match slf.read_char()? {
            Some(']') => {
                slf.state = State::CdataSectionEnd;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_char(']');
                slf.state = State::CdataSection;
                slf.unread_char(c);
                Ok(ControlToken::Continue)
            }
        },
        State::CdataSectionEnd => match slf.read_char()? {
            Some(']') => {
                slf.emit_char(']');
                Ok(ControlToken::Continue)
            }
            Some('>') => {
                slf.state = State::Data;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_chars(b"]]");
                slf.unread_char(c);
                slf.state = State::CdataSection;
                Ok(ControlToken::Continue)
            }
        },
        State::CharacterReference => {
            // TODO: we can avoid these Reader method calls by changing CharacterReference to be a function instead of a state
            slf.some_offset =
                slf.reader.position() - slf.reader.len_of_char_in_current_encoding('&');
            slf.temporary_buffer.clear();
            slf.temporary_buffer.push('&');
            match slf.read_char()? {
                Some(x) if x.is_ascii_alphanumeric() => {
                    slf.unread_char(Some(x));
                    slf.state = State::NamedCharacterReference;
                    Ok(ControlToken::Continue)
                }
                Some('#') => {
                    slf.temporary_buffer.push('#');
                    slf.state = State::NumericCharacterReference;
                    Ok(ControlToken::Continue)
                }
                c => {
                    slf.flush_code_points_consumed_as_character_reference();
                    slf.state = slf.return_state.take().unwrap();
                    slf.unread_char(c);
                    Ok(ControlToken::Continue)
                }
            }
        }
        State::NamedCharacterReference => {
            let c = slf.read_char()?;

            let char_ref = match c {
                Some(x) => try_read_character_reference(x, |x| slf.try_read_string(x, true))?
                    .map(|char_ref| (x, char_ref)),

                None => None,
            };

            if let Some((x, char_ref)) = char_ref {
                slf.temporary_buffer.push(x);
                slf.temporary_buffer.push_str(char_ref.name);
                let char_ref_name_last_character = char_ref.name.chars().last();

                let next_character = slf.read_char()?;
                slf.unread_char(next_character);

                if slf.is_consumed_as_part_of_an_attribute()
                    && char_ref_name_last_character != Some(';')
                    && matches!(next_character, Some(x) if x == '=' || x.is_ascii_alphanumeric())
                {
                    slf.flush_code_points_consumed_as_character_reference();
                    slf.state = slf.return_state.take().unwrap();
                    Ok(ControlToken::Continue)
                } else {
                    if char_ref_name_last_character != Some(';') {
                        slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
                    }

                    slf.temporary_buffer.clear();
                    slf.temporary_buffer.push_str(char_ref.characters);
                    slf.flush_code_points_consumed_as_character_reference();
                    slf.state = slf.return_state.take().unwrap();
                    Ok(ControlToken::Continue)
                }
            } else {
                slf.unread_char(c);
                slf.flush_code_points_consumed_as_character_reference();
                slf.state = State::AmbiguousAmpersand;
                Ok(ControlToken::Continue)
            }
        }
        State::AmbiguousAmpersand => match slf.read_char()? {
            Some(x) if x.is_ascii_alphanumeric() => {
                if slf.is_consumed_as_part_of_an_attribute() {
                    slf.emitter.push_attribute_value(ctostr!(x));
                } else {
                    slf.emit_char(x);
                }

                Ok(ControlToken::Continue)
            }
            c @ Some(';') => {
                slf.emit_error(Error::UnknownNamedCharacterReference);
                slf.unread_char(c);
                slf.state = slf.return_state.take().unwrap();
                Ok(ControlToken::Continue)
            }
            c => {
                slf.unread_char(c);
                slf.state = slf.return_state.take().unwrap();
                Ok(ControlToken::Continue)
            }
        },
        State::NumericCharacterReference => {
            slf.character_reference_code = 0;
            match slf.read_char()? {
                Some(x @ 'x' | x @ 'X') => {
                    slf.temporary_buffer.push(x);
                    slf.state = State::HexadecimalCharacterReferenceStart;
                    Ok(ControlToken::Continue)
                }
                c => {
                    slf.unread_char(c);
                    slf.state = State::DecimalCharacterReferenceStart;
                    Ok(ControlToken::Continue)
                }
            }
        }
        State::HexadecimalCharacterReferenceStart => match slf.read_char()? {
            c @ Some('0'..='9' | 'A'..='F' | 'a'..='f') => {
                slf.unread_char(c);
                slf.state = State::HexadecimalCharacterReference;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
                slf.flush_code_points_consumed_as_character_reference();
                slf.unread_char(c);
                slf.state = slf.return_state.take().unwrap();
                Ok(ControlToken::Continue)
            }
        },
        State::DecimalCharacterReferenceStart => match slf.read_char()? {
            Some(x @ ascii_digit_pat!()) => {
                slf.unread_char(Some(x));
                slf.state = State::DecimalCharacterReference;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
                slf.flush_code_points_consumed_as_character_reference();
                slf.unread_char(c);
                slf.state = slf.return_state.take().unwrap();
                Ok(ControlToken::Continue)
            }
        },
        State::HexadecimalCharacterReference => match slf.read_char()? {
            Some(x @ ascii_digit_pat!()) => {
                mutate_character_reference!(*16 + x - 0x0030);
                Ok(ControlToken::Continue)
            }
            Some(x @ 'A'..='F') => {
                mutate_character_reference!(*16 + x - 0x0037);
                Ok(ControlToken::Continue)
            }
            Some(x @ 'a'..='f') => {
                mutate_character_reference!(*16 + x - 0x0057);
                Ok(ControlToken::Continue)
            }
            Some(';') => {
                slf.state = State::NumericCharacterReferenceEnd;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
                slf.unread_char(c);
                slf.state = State::NumericCharacterReferenceEnd;
                Ok(ControlToken::Continue)
            }
        },
        State::DecimalCharacterReference => match slf.read_char()? {
            Some(x @ ascii_digit_pat!()) => {
                mutate_character_reference!(*10 + x - 0x0030);
                Ok(ControlToken::Continue)
            }
            Some(';') => {
                slf.state = State::NumericCharacterReferenceEnd;
                Ok(ControlToken::Continue)
            }
            c => {
                slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
                slf.unread_char(c);
                slf.state = State::NumericCharacterReferenceEnd;
                Ok(ControlToken::Continue)
            }
        },
        State::NumericCharacterReferenceEnd => {
            match slf.character_reference_code {
                0x00 => {
                    slf.emit_error(Error::NullCharacterReference);
                    slf.character_reference_code = 0xfffd;
                }
                0x110000.. => {
                    slf.emit_error(Error::CharacterReferenceOutsideUnicodeRange);
                    slf.character_reference_code = 0xfffd;
                }
                surrogate_pat!() => {
                    slf.emit_error(Error::SurrogateCharacterReference);
                    slf.character_reference_code = 0xfffd;
                }
                // noncharacter
                noncharacter_pat!() => {
                    slf.emit_error(Error::NoncharacterCharacterReference);
                }
                // 0x000d, or a control that is not whitespace
                x @ 0x000d | x @ control_pat!()
                    if !matches!(x, 0x0009 | 0x000a | 0x000c | 0x0020) =>
                {
                    slf.emit_error(Error::ControlCharacterReference);
                    slf.character_reference_code = match x {
                        0x80 => 0x20AC, // EURO SIGN (€)
                        0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK (‚)
                        0x83 => 0x0192, // LATIN SMALL LETTER F WITH HOOK (ƒ)
                        0x84 => 0x201E, // DOUBLE LOW-9 QUOTATION MARK („)
                        0x85 => 0x2026, // HORIZONTAL ELLIPSIS (…)
                        0x86 => 0x2020, // DAGGER (†)
                        0x87 => 0x2021, // DOUBLE DAGGER (‡)
                        0x88 => 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT (ˆ)
                        0x89 => 0x2030, // PER MILLE SIGN (‰)
                        0x8A => 0x0160, // LATIN CAPITAL LETTER S WITH CARON (Š)
                        0x8B => 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK (‹)
                        0x8C => 0x0152, // LATIN CAPITAL LIGATURE OE (Œ)
                        0x8E => 0x017D, // LATIN CAPITAL LETTER Z WITH CARON (Ž)
                        0x91 => 0x2018, // LEFT SINGLE QUOTATION MARK (‘)
                        0x92 => 0x2019, // RIGHT SINGLE QUOTATION MARK (’)
                        0x93 => 0x201C, // LEFT DOUBLE QUOTATION MARK (“)
                        0x94 => 0x201D, // RIGHT DOUBLE QUOTATION MARK (”)
                        0x95 => 0x2022, // BULLET (•)
                        0x96 => 0x2013, // EN DASH (–)
                        0x97 => 0x2014, // EM DASH (—)
                        0x98 => 0x02DC, // SMALL TILDE (˜)
                        0x99 => 0x2122, // TRADE MARK SIGN (™)
                        0x9A => 0x0161, // LATIN SMALL LETTER S WITH CARON (š)
                        0x9B => 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (›)
                        0x9C => 0x0153, // LATIN SMALL LIGATURE OE (œ)
                        0x9E => 0x017E, // LATIN SMALL LETTER Z WITH CARON (ž)
                        0x9F => 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS (Ÿ)
                        _ => slf.character_reference_code,
                    };
                }
                _ => (),
            }

            slf.temporary_buffer.clear();
            slf.temporary_buffer
                .push(std::char::from_u32(slf.character_reference_code).unwrap());
            slf.flush_code_points_consumed_as_character_reference();
            slf.state = slf.return_state.take().unwrap();
            Ok(ControlToken::Continue)
        }
    }
}

#[inline]
pub(super) fn handle_cdata_open<O, R, E>(slf: &mut Machine<R, O, E>, action: CdataAction)
where
    O: Offset,
    R: Reader + Position<O>,
    E: Emitter<O>,
{
    match action {
        CdataAction::Cdata => slf.state = State::CdataSection,
        CdataAction::BogusComment => {
            slf.emit_error(Error::CdataInHtmlContent);

            slf.emitter.init_comment(slf.reader.position());
            slf.emitter.push_comment("[CDATA[");
            slf.state = State::BogusComment;
        }
    }
}

// this is a stack that can hold 0 to 2 Ts
#[derive(Debug, Default, Clone, Copy)]
struct Stack2<T: Copy>(Option<(T, Option<T>)>);

impl<T: Copy> Stack2<T> {
    #[inline]
    fn push(&mut self, c: T) {
        self.0 = match self.0 {
            None => Some((c, None)),
            Some((c1, None)) => Some((c1, Some(c))),
            Some((_c1, Some(_c2))) => panic!("stack full!"),
        }
    }

    #[inline]
    fn pop(&mut self) -> Option<T> {
        let (new_self, rv) = match self.0 {
            Some((c1, Some(c2))) => (Some((c1, None)), Some(c2)),
            Some((c1, None)) => (None, Some(c1)),
            None => (None, None),
        };
        self.0 = new_self;
        rv
    }
}