use crate::{
    naive_parser::naive_next_state,
    offset::{Offset, Position},
    reader::Reader,
    Emitter, Error,
};

use super::Machine;

impl<R, O, E> Machine<R, O, E>
where
    R: Reader + Position<O>,
    O: Offset,
    E: Emitter<O>,
{
    /// Emits the given character as a character token.
    #[inline]
    pub(super) fn emit_char(&mut self, c: char) {
        self.emitter.emit_string(ctostr!(c));
    }

    /// Emits every byte of the given byte slice as a character token.
    ///
    /// (We're operating on bytes to enable compiler optimization,
    /// since [`str::chars`] isn't `const`.)
    #[inline]
    pub(super) fn emit_chars(&mut self, s: &[u8]) {
        self.emitter.emit_string(
            // this unsafe block is only temporary and will be removed in the next commit
            unsafe { std::str::from_utf8_unchecked(s) },
        );
    }

    #[inline]
    pub(crate) fn emit_error(&mut self, error: Error) {
        let span = match error {
            Error::EofBeforeTagName
            | Error::EofInCdata
            | Error::EofInComment
            | Error::EofInDoctype
            | Error::EofInScriptHtmlCommentLikeText
            | Error::EofInTag
            | Error::MissingSemicolonAfterCharacterReference => {
                self.reader.position()..self.reader.position()
            }
            Error::AbsenceOfDigitsInNumericCharacterReference
            | Error::NullCharacterReference
            | Error::CharacterReferenceOutsideUnicodeRange
            | Error::SurrogateCharacterReference
            | Error::NoncharacterCharacterReference
            | Error::ControlCharacterReference
            | Error::UnknownNamedCharacterReference => self.some_offset..self.reader.position(),

            _ => self.position_before_match..self.reader.position(),
        };
        self.emitter.report_error(error, span);
    }

    /// Assuming the _current token_ is an end tag, return true if all of these hold. Return false otherwise.
    ///
    /// * the _last start tag_ exists
    /// * the current end tag token's name equals to the last start tag's name.
    ///
    /// See also WHATWG's definition of [appropriate end tag token].
    ///
    /// [appropriate end tag token]: https://html.spec.whatwg.org/multipage/parsing.html#appropriate-end-tag-token
    #[inline]
    pub(super) fn current_end_tag_is_appropriate(&mut self) -> bool {
        self.current_tag_name == self.last_start_tag_name
    }

    #[inline]
    pub(super) fn init_start_tag(&mut self) {
        self.emitter
            .init_start_tag(self.some_offset, self.position_before_match);
        self.current_tag_name.clear();
        self.is_start_tag = true;
    }

    #[inline]
    pub(super) fn init_end_tag(&mut self) {
        self.emitter
            .init_end_tag(self.some_offset, self.position_before_match);
        self.current_tag_name.clear();
        self.is_start_tag = false;
    }

    #[inline]
    pub(super) fn init_doctype(&mut self) {
        self.emitter.init_doctype(self.some_offset);
    }

    #[inline]
    pub(super) fn push_tag_name(&mut self, s: &str) {
        self.emitter.push_tag_name(s);
        self.current_tag_name.push_str(s);
    }

    #[inline]
    pub(super) fn emit_current_tag(&mut self) {
        self.emitter.emit_current_tag(self.reader.position());
        if self.is_start_tag {
            if self.naively_switch_state {
                self.state = naive_next_state(&self.current_tag_name).into();
            }
            std::mem::swap(&mut self.last_start_tag_name, &mut self.current_tag_name);
        }
    }

    #[inline]
    pub(super) fn unread_char(&mut self, c: Option<char>) {
        self.to_reconsume.push(c);
    }

    #[inline]
    fn validate_char(&mut self, c: char) {
        match c as u32 {
            surrogate_pat!() => {
                self.emit_error(Error::SurrogateInInputStream);
            }
            noncharacter_pat!() => {
                self.emit_error(Error::NoncharacterInInputStream);
            }
            // control without whitespace or nul
            x @ control_pat!()
                if !matches!(x, 0x0000 | 0x0009 | 0x000a | 0x000c | 0x000d | 0x0020) =>
            {
                self.emit_error(Error::ControlCharacterInInputStream);
            }
            _ => (),
        }
    }

    pub(super) fn read_char(&mut self) -> Result<Option<char>, R::Error> {
        let (c_res, reconsumed) = match self.to_reconsume.pop() {
            Some(c) => (Ok(c), true),
            None => (self.reader.read_char(), false),
        };

        let mut c = match c_res {
            Ok(Some(c)) => c,
            res => return res,
        };

        if c == '\r' {
            c = '\n';
            let c2 = self.reader.read_char()?;
            if c2 != Some('\n') {
                self.unread_char(c2);
            }
        }

        if !reconsumed {
            self.validate_char(c);
        }

        Ok(Some(c))
    }

    #[inline]
    pub(super) fn try_read_string(
        &mut self,
        mut s: &str,
        case_sensitive: bool,
    ) -> Result<bool, R::Error> {
        debug_assert!(!s.is_empty());

        let to_reconsume_bak = self.to_reconsume;
        let mut chars = s.chars();
        while let Some(c) = self.to_reconsume.pop() {
            if let (Some(x), Some(x2)) = (c, chars.next()) {
                if x == x2 || (!case_sensitive && x.to_ascii_lowercase() == x2.to_ascii_lowercase())
                {
                    s = &s[x.len_utf8()..];
                    continue;
                }
            }

            self.to_reconsume = to_reconsume_bak;
            return Ok(false);
        }

        self.reader.try_read_string(s, case_sensitive)
    }

    pub(super) fn is_consumed_as_part_of_an_attribute(&self) -> bool {
        matches!(
            self.return_state,
            Some(
                State::AttributeValueDoubleQuoted
                    | State::AttributeValueSingleQuoted
                    | State::AttributeValueUnquoted
            )
        )
    }

    pub(super) fn flush_code_points_consumed_as_character_reference(&mut self) {
        if self.is_consumed_as_part_of_an_attribute() {
            self.emitter.push_attribute_value(&self.temporary_buffer);
            self.temporary_buffer.clear();
        } else {
            self.flush_buffer_characters();
        }
    }

    pub(super) fn flush_buffer_characters(&mut self) {
        self.emitter.emit_string(&self.temporary_buffer);
        self.temporary_buffer.clear();
    }
}

macro_rules! surrogate_pat {
    () => {
        0xd800..=0xdfff
    };
}

pub(crate) use surrogate_pat;

macro_rules! control_pat {
    () => (0x0d | 0x0000..=0x001f | 0x007f..=0x009f)
}

pub(crate) use control_pat;

macro_rules! ascii_digit_pat {
    () => {
        '0'..='9'
    };
}

pub(crate) use ascii_digit_pat;

macro_rules! whitespace_pat {
    () => {
        '\t' | '\u{0A}' | '\u{0C}' | ' '
    };
}

pub(crate) use whitespace_pat;

macro_rules! noncharacter_pat {
    () => {
        0xfdd0
            ..=0xfdef
                | 0xfffe
                | 0xffff
                | 0x1fffe
                | 0x1ffff
                | 0x2fffe
                | 0x2ffff
                | 0x3fffe
                | 0x3ffff
                | 0x4fffe
                | 0x4ffff
                | 0x5fffe
                | 0x5ffff
                | 0x6fffe
                | 0x6ffff
                | 0x7fffe
                | 0x7ffff
                | 0x8fffe
                | 0x8ffff
                | 0x9fffe
                | 0x9ffff
                | 0xafffe
                | 0xaffff
                | 0xbfffe
                | 0xbffff
                | 0xcfffe
                | 0xcffff
                | 0xdfffe
                | 0xdffff
                | 0xefffe
                | 0xeffff
                | 0xffffe
                | 0xfffff
                | 0x10fffe
                | 0x10ffff
    };
}

pub(crate) use noncharacter_pat;

// When integration tests are running, this enum is public and we get warnings about missing docs.
// However, it's not actually part of public API.
#[allow(missing_docs)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum State {
    Data,
    RcData,
    RawText,
    ScriptData,
    PlainText,
    TagOpen,
    EndTagOpen,
    TagName,
    RcDataLessThanSign,
    RcDataEndTagOpen,
    RcDataEndTagName,
    RawTextLessThanSign,
    RawTextEndTagOpen,
    RawTextEndTagName,
    ScriptDataLessThanSign,
    ScriptDataEndTagOpen,
    ScriptDataEndTagName,
    ScriptDataEscapeStart,
    ScriptDataEscapeStartDash,
    ScriptDataEscaped,
    ScriptDataEscapedDash,
    ScriptDataEscapedDashDash,
    ScriptDataEscapedLessThanSign,
    ScriptDataEscapedEndTagOpen,
    ScriptDataEscapedEndTagName,
    ScriptDataDoubleEscapeStart,
    ScriptDataDoubleEscaped,
    ScriptDataDoubleEscapedDash,
    ScriptDataDoubleEscapedDashDash,
    ScriptDataDoubleEscapedLessThanSign,
    ScriptDataDoubleEscapeEnd,
    BeforeAttributeName,
    AttributeName,
    AfterAttributeName,
    BeforeAttributeValue,
    AttributeValueDoubleQuoted,
    AttributeValueSingleQuoted,
    AttributeValueUnquoted,
    AfterAttributeValueQuoted,
    SelfClosingStartTag,
    BogusComment,
    MarkupDeclarationOpen,
    CommentStart,
    CommentStartDash,
    Comment,
    CommentLessThanSign,
    CommentLessThanSignBang,
    CommentLessThanSignBangDash,
    CommentLessThanSignBangDashDash,
    CommentEndDash,
    CommentEnd,
    CommentEndBang,
    Doctype,
    BeforeDoctypeName,
    DoctypeName,
    AfterDoctypeName,
    AfterDoctypePublicKeyword,
    BeforeDoctypePublicIdentifier,
    DoctypePublicIdentifierDoubleQuoted,
    DoctypePublicIdentifierSingleQuoted,
    AfterDoctypePublicIdentifier,
    BetweenDoctypePublicAndSystemIdentifiers,
    AfterDoctypeSystemKeyword,
    BeforeDoctypeSystemIdentifier,
    DoctypeSystemIdentifierDoubleQuoted,
    DoctypeSystemIdentifierSingleQuoted,
    AfterDoctypeSystemIdentifier,
    BogusDoctype,
    CdataSection,
    CdataSectionBracket,
    CdataSectionEnd,
    CharacterReference,
    NamedCharacterReference,
    AmbiguousAmpersand,
    NumericCharacterReference,
    HexadecimalCharacterReferenceStart,
    DecimalCharacterReferenceStart,
    HexadecimalCharacterReference,
    DecimalCharacterReference,
    NumericCharacterReferenceEnd,
}

macro_rules! ctostr {
    ($c:expr) => {
        &*$c.encode_utf8(&mut [0; 4])
    };
}

pub(crate) use ctostr;