use std::collections::btree_map::Entry;
use std::collections::BTreeSet;
use std::collections::VecDeque;
use std::mem;
use std::ops::Range;

use crate::attr::AttrValueSyntax;
use crate::offset::NoopOffset;
use crate::offset::Offset;
use crate::Error;

/// An emitter is an object providing methods to the tokenizer to produce ("emit") tokens.
///
/// Domain-specific applications of the HTML tokenizer can manually implement this trait to
/// customize per-token allocations, or avoid them altogether.
///
/// An emitter is assumed to have these internal states:
///
/// * _current token_: Can be a tag, doctype or comment token. There's only one current token.
/// * _current attribute_: The currently processed HTML attribute, consisting of two strings for name and value.
///
/// The following methods are describing what kind of behavior the WHATWG spec expects, but that
/// doesn't mean you need to follow it. For example:
///
/// * If your usage of the tokenizer will ignore all errors, none of the error handling and
///   validation requirements apply to you. You can implement `emit_error` as noop and omit all
///   checks that would emit errors.
///
/// * If you don't care about attributes at all, you can make all related methods a noop.
pub trait Emitter<O> {
    /// The token type emitted by this emitter. This controls what type of values the [`Tokenizer`](crate::Tokenizer)
    /// yields when used as an iterator.
    type Token;

    /// The state machine has reached the end of the file. It will soon call `pop_token` for the
    /// last time.
    fn emit_eof(&mut self);

    /// A (probably recoverable) parsing error has occured.
    fn emit_error(&mut self, error: Error, offset: O);

    /// After every state change, the tokenizer calls this method to retrieve a new token that can
    /// be returned via the tokenizer's iterator interface.
    fn pop_token(&mut self) -> Option<Self::Token>;

    /// Emit a bunch of plain characters as character tokens.
    fn emit_string(&mut self, c: &str);

    /// Set the _current token_ to a start tag.
    fn init_start_tag(&mut self, offset: O);

    /// Set the _current token_ to an end tag.
    fn init_end_tag(&mut self, offset: O);

    /// Set the _current token_ to a comment.
    fn init_comment(&mut self, data_offset: O);

    /// Emit the _current token_, assuming it is a tag.
    ///
    /// Also get the current attribute and append it to the to-be-emitted tag. See docstring for
    /// [`Emitter::init_attribute_name`] for how duplicates should be handled.
    ///
    /// If an end tag is emitted with attributes, an [`Error::EndTagWithAttributes`]
    /// error should be emitted.
    ///
    /// If the current token is not a start/end tag, this method may panic.
    fn emit_current_tag(&mut self, offset: O);

    /// Emit the _current token_, assuming it is a comment.
    ///
    /// If the current token is not a comment, this method may panic.
    fn emit_current_comment(&mut self, offset: O);

    /// Emit the _current token_, assuming it is a doctype.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn emit_current_doctype(&mut self, offset: O);

    /// Assuming the _current token_ is a start tag, set the self-closing flag.
    ///
    /// If the current token is not a start or end tag, this method may panic.
    ///
    /// If the current token is an end tag, the emitter should emit the
    /// [`Error::EndTagWithTrailingSolidus`] error.
    fn set_self_closing(&mut self, offset: O);

    /// Assuming the _current token_ is a doctype, set its "force quirks" flag to true.
    ///
    /// If the current token is not a doctype, this method pay panic.
    fn set_force_quirks(&mut self);

    /// Assuming the _current token_ is a start/end tag, append a string to the current tag's name.
    ///
    /// If the current token is not a start or end tag, this method may panic.
    fn push_tag_name(&mut self, s: &str);

    /// Assuming the _current token_ is a comment, append a string to the comment's contents.
    ///
    /// If the current token is not a comment, this method may panic.
    fn push_comment(&mut self, s: &str);

    /// Assuming the _current token_ is a doctype, append a string to the doctype's name.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn push_doctype_name(&mut self, s: &str);

    /// Set the _current token_ to a new doctype token:
    ///
    /// * the name should be empty
    /// * the "public identifier" should be null (different from empty)
    /// * the "system identifier" should be null (different from empty)
    /// * the "force quirks" flag should be `false`
    fn init_doctype(&mut self, offset: O);

    /// Set the _current attribute_ to a new one, starting with empty name and value strings.
    ///
    /// The old attribute, if any, should be put on the _current token_. If an attribute with that
    /// name already exists, WHATWG says the new one should be ignored and a
    /// [`Error::DuplicateAttribute`] error should be emitted.
    ///
    /// If the current token is no tag at all, this method may panic.
    fn init_attribute_name(&mut self, offset: O);

    /// Called before the first push_attribute_value call.
    ///
    /// If there is no current attribute, this method may panic.
    #[allow(unused_variables)]
    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {}

    /// Append a string to the current attribute's name.
    ///
    /// If there is no current attribute, this method may panic.
    fn push_attribute_name(&mut self, s: &str);

    /// Append a string to the current attribute's value.
    ///
    /// If there is no current attribute, this method may panic.
    fn push_attribute_value(&mut self, s: &str);

    /// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn init_doctype_public_id(&mut self, offset: O);

    /// Assuming the _current token_ is a doctype, set its "system identifier" to the empty string.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn init_doctype_system_id(&mut self, offset: O);

    /// Assuming the _current token_ is a doctype, append a string to its "public identifier" to the given string.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn push_doctype_public_id(&mut self, s: &str);

    /// Assuming the _current token_ is a doctype, append a string to its "system identifier" to the given string.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn push_doctype_system_id(&mut self, s: &str);
}

/// The default implementation of [`Emitter`], used to produce tokens.
///
/// # Warning
///
/// * Using the DefaultEmitter without calling [`Tokenizer::set_state`]
///   results in wrong state transitions:
///
///   ```
///   # use html5tokenizer::{DefaultEmitter, Tokenizer, Token};
///   let emitter = DefaultEmitter::default();
///   let html = "<script><b>";
///   let mut tokens = Tokenizer::new(html, emitter).flatten();
///   assert!(matches!(tokens.next().unwrap(), Token::StartTag(tag) if tag.name == "script"));
///   assert!(matches!(tokens.next().unwrap(), Token::StartTag(tag) if tag.name == "b"));
///   ```
///
/// [`Tokenizer::set_state`]: crate::Tokenizer::set_state
pub struct DefaultEmitter<O = NoopOffset> {
    current_characters: String,
    current_token: Option<Token<O>>,
    current_attribute: Option<(String, crate::attr::AttrInternal<O>)>,
    seen_attributes: BTreeSet<String>,
    emitted_tokens: VecDeque<Token<O>>,
    attr_in_end_tag_span: Option<Range<O>>,
}

impl<O> Default for DefaultEmitter<O> {
    fn default() -> Self {
        DefaultEmitter {
            current_characters: String::new(),
            current_token: None,
            current_attribute: None,
            seen_attributes: BTreeSet::new(),
            emitted_tokens: VecDeque::new(),
            attr_in_end_tag_span: None,
        }
    }
}

impl<O> DefaultEmitter<O> {
    fn emit_token(&mut self, token: Token<O>) {
        self.flush_current_characters();
        self.emitted_tokens.push_front(token);
    }

    fn flush_current_attribute(&mut self)
    where
        O: Offset,
    {
        if let Some((name, map_val)) = self.current_attribute.take() {
            match self.current_token {
                Some(Token::StartTag(ref mut tag)) => match tag.attributes.inner.entry(name) {
                    Entry::Vacant(vacant) => {
                        vacant.insert(map_val);
                    }
                    Entry::Occupied(entry) => {
                        let name_len = entry.key().len();
                        self.push_error(Error::DuplicateAttribute, map_val.name_span(name_len));
                    }
                },
                Some(Token::EndTag(_)) => {
                    let name_span = map_val.name_span(name.len());
                    self.attr_in_end_tag_span = Some(name_span.clone());
                    if !self.seen_attributes.insert(name) {
                        self.push_error(Error::DuplicateAttribute, name_span);
                    }
                }
                _ => {
                    debug_assert!(false);
                }
            }
        }
    }

    fn flush_current_characters(&mut self) {
        if self.current_characters.is_empty() {
            return;
        }

        let s = mem::take(&mut self.current_characters);
        self.emit_token(Token::String(s));
    }

    fn push_error(&mut self, error: Error, span: Range<O>) {
        // bypass character flushing in self.emit_token: we don't need the error location to be
        // that exact
        self.emitted_tokens.push_front(Token::Error { error, span });
    }
}

impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
    type Token = Token<O>;

    fn emit_eof(&mut self) {
        self.flush_current_characters();
    }

    fn emit_error(&mut self, error: Error, offset: O) {
        self.push_error(error, offset..offset);
    }

    fn pop_token(&mut self) -> Option<Self::Token> {
        self.emitted_tokens.pop_back()
    }

    fn emit_string(&mut self, s: &str) {
        self.current_characters.push_str(s);
    }

    fn init_start_tag(&mut self, offset: O) {
        self.current_token = Some(Token::StartTag(StartTag {
            span: offset - b"<".len()..offset - b"<".len(),
            self_closing: false,
            name: String::new(),
            attributes: Default::default(),
        }));
    }
    fn init_end_tag(&mut self, offset: O) {
        self.current_token = Some(Token::EndTag(EndTag {
            span: offset - b"</".len()..offset - b"</".len(),
            name: String::new(),
        }));
        self.seen_attributes.clear();
    }

    fn init_comment(&mut self, data_offset: O) {
        self.current_token = Some(Token::Comment(Comment {
            data: String::new(),
            data_offset,
        }));
    }
    fn emit_current_tag(&mut self, offset: O) {
        self.flush_current_attribute();
        let mut token = self.current_token.take().unwrap();
        match &mut token {
            Token::EndTag(tag) => {
                if !self.seen_attributes.is_empty() {
                    let span = self.attr_in_end_tag_span.take().unwrap();
                    self.push_error(Error::EndTagWithAttributes, span);
                }
                self.seen_attributes.clear();
                tag.span.end = offset + b">".len();
            }
            Token::StartTag(tag) => {
                tag.span.end = offset + b">".len();
            }
            _ => debug_assert!(false),
        }
        self.emit_token(token);
    }
    fn emit_current_comment(&mut self, _offset: O) {
        let comment = self.current_token.take().unwrap();
        debug_assert!(matches!(comment, Token::Comment(_)));
        self.emit_token(comment);
    }

    fn emit_current_doctype(&mut self, offset: O) {
        let Some(Token::Doctype(mut doctype)) = self.current_token.take() else {
            debug_assert!(false);
            return;
        };
        doctype.span.end = offset;
        self.emit_token(Token::Doctype(doctype));
    }

    fn set_self_closing(&mut self, offset: O) {
        let tag = self.current_token.as_mut().unwrap();
        match tag {
            Token::StartTag(StartTag {
                ref mut self_closing,
                ..
            }) => {
                *self_closing = true;
            }
            Token::EndTag(_) => {
                self.emit_error(Error::EndTagWithTrailingSolidus, offset - 1);
            }
            _ => {
                debug_assert!(false);
            }
        }
    }
    fn set_force_quirks(&mut self) {
        match self.current_token {
            Some(Token::Doctype(ref mut doctype)) => doctype.force_quirks = true,
            _ => debug_assert!(false),
        }
    }
    fn push_tag_name(&mut self, s: &str) {
        match self.current_token {
            Some(Token::StartTag(StartTag { ref mut name, .. })) => {
                name.push_str(s);
            }
            Some(Token::EndTag(EndTag { ref mut name, .. })) => {
                name.push_str(s);
            }
            _ => debug_assert!(false),
        }
    }

    fn push_comment(&mut self, s: &str) {
        match self.current_token {
            Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s),
            _ => debug_assert!(false),
        }
    }

    fn push_doctype_name(&mut self, s: &str) {
        match self.current_token {
            Some(Token::Doctype(ref mut doctype)) => doctype.name.push_str(s),
            _ => debug_assert!(false),
        }
    }
    fn init_doctype(&mut self, offset: O) {
        self.current_token = Some(Token::Doctype(Doctype {
            name: String::new(),
            force_quirks: false,
            public_id: None,
            system_id: None,
            span: offset..O::default(),
            public_id_offset: O::default(),
            system_id_offset: O::default(),
        }));
    }

    fn init_attribute_name(&mut self, offset: O) {
        self.flush_current_attribute();
        self.current_attribute = Some((
            String::new(),
            crate::attr::AttrInternal {
                name_offset: offset,
                value: String::new(),
                value_offset: O::default(),
                value_syntax: None,
            },
        ));
    }
    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
        let (_, current_attribute) = self.current_attribute.as_mut().unwrap();
        current_attribute.value_offset = offset;
        current_attribute.value_syntax = Some(syntax);
    }

    fn push_attribute_name(&mut self, s: &str) {
        let current_attr = self.current_attribute.as_mut().unwrap();
        current_attr.0.push_str(s);
    }
    fn push_attribute_value(&mut self, s: &str) {
        let current_attr = self.current_attribute.as_mut().unwrap();
        current_attr.1.value.push_str(s);
    }
    fn init_doctype_public_id(&mut self, offset: O) {
        let Some(Token::Doctype(doctype)) = &mut self.current_token else {
            debug_assert!(false);
            return;
        };
        doctype.public_id = Some("".to_owned());
        doctype.public_id_offset = offset;
    }
    fn init_doctype_system_id(&mut self, offset: O) {
        let Some(Token::Doctype(doctype)) = &mut self.current_token else {
            debug_assert!(false);
            return;
        };
        doctype.system_id = Some("".to_owned());
        doctype.system_id_offset = offset;
    }
    fn push_doctype_public_id(&mut self, s: &str) {
        if let Some(Token::Doctype(Doctype {
            public_id: Some(ref mut id),
            ..
        })) = self.current_token
        {
            id.push_str(s);
        } else {
            debug_assert!(false);
        }
    }
    fn push_doctype_system_id(&mut self, s: &str) {
        if let Some(Token::Doctype(Doctype {
            system_id: Some(ref mut id),
            ..
        })) = self.current_token
        {
            id.push_str(s);
        } else {
            debug_assert!(false);
        }
    }
}

/// An HTML start tag, such as `<p>` or `<a>`.
#[derive(Debug, Eq, PartialEq)]
pub struct StartTag<O> {
    /// Whether this tag is self-closing. If it is self-closing, no following [`EndTag`] should be
    /// expected.
    pub self_closing: bool,

    /// The start tag's name, such as `"p"` or `"a"`.
    pub name: String,

    /// A mapping for any HTML attributes this start tag may have.
    ///
    /// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own
    /// [`Emitter`] to tweak this behavior.
    pub attributes: crate::attr::AttributeMap<O>,

    /// The source code span of the tag.
    pub span: Range<O>,
}

impl<O: Offset> StartTag<O> {
    /// Calculates the span for the tag name and returns it.
    pub fn name_span(&self) -> Range<O> {
        let start = self.span.start + b"<".len();
        start..start + self.name.len()
    }
}

/// A HTML end/close tag, such as `</p>` or `</a>`.
#[derive(Debug, Eq, PartialEq)]
pub struct EndTag<O> {
    /// The ending tag's name, such as `"p"` or `"a"`.
    pub name: String,

    /// The source code span of the tag.
    pub span: Range<O>,
}

impl<O: Offset> EndTag<O> {
    /// Calculates the span for the tag name and returns it.
    pub fn name_span(&self) -> Range<O> {
        let start = self.span.start + b"</".len();
        start..start + self.name.len()
    }
}

/// An HTML comment.
#[derive(PartialEq, Eq, Debug)]
pub struct Comment<O> {
    /// The text within the comment.
    pub data: String,
    /// The source offset of the comment data.
    pub data_offset: O,
}

impl<O: Offset> Comment<O> {
    /// Calculates the span for the comment data and returns it.
    pub fn data_span(&self) -> Range<O> {
        self.data_offset..self.data_offset + self.data.len()
    }
}

/// A doctype. Some examples:
///
/// * `<!DOCTYPE {name}>`
/// * `<!DOCTYPE {name} PUBLIC '{public_id}'>`
/// * `<!DOCTYPE {name} SYSTEM '{system_id}'>`
/// * `<!DOCTYPE {name} PUBLIC '{public_id}' '{system_id}'>`
#[derive(Debug, Eq, PartialEq)]
pub struct Doctype<O> {
    /// The [force-quirks flag].
    ///
    /// [force-quirks flag]: https://html.spec.whatwg.org/#force-quirks-flag
    pub force_quirks: bool,

    /// The doctype's name. For HTML documents this is "html".
    pub name: String,

    /// The doctype's public identifier.
    pub public_id: Option<String>,

    /// The doctype's system identifier.
    pub system_id: Option<String>,

    /// The source code span of the doctype.
    pub span: Range<O>,

    /// The source offset of the pulic identifier.
    public_id_offset: O,

    /// The source offset of the system identifier.
    system_id_offset: O,
}

impl<O: Offset> Doctype<O> {
    /// Calculates the span of the public identifier and returns it.
    pub fn public_id_span(&self) -> Option<Range<O>> {
        let public_id = self.public_id.as_ref()?;
        Some(self.public_id_offset..self.public_id_offset + public_id.len())
    }

    /// Calculates the span of the system identifier and returns it.
    pub fn system_id_span(&self) -> Option<Range<O>> {
        let system_id = self.system_id.as_ref()?;
        Some(self.system_id_offset..self.system_id_offset + system_id.len())
    }
}

/// The token type used by default. You can define your own token type by implementing the
/// [`Emitter`] trait.
#[derive(Debug, Eq, PartialEq)]
pub enum Token<O> {
    /// A HTML start tag.
    StartTag(StartTag<O>),
    /// A HTML end tag.
    EndTag(EndTag<O>),
    /// A literal string.
    String(String),
    /// A HTML comment.
    Comment(Comment<O>),
    /// A HTML doctype declaration.
    Doctype(Doctype<O>),
    /// A HTML parsing error.
    ///
    /// Can be skipped over, the tokenizer is supposed to recover from the error and continues with
    /// more tokens afterward.
    Error {
        /// What kind of error occured.
        error: Error,
        /// The source code span of the error.
        span: Range<O>,
    },
}

/// The majority of our testing of the [`DefaultEmitter`] is done against the
/// html5lib-tests in the html5lib integration test. This module only tests
/// details that aren't present in the html5lib test data.
#[cfg(test)]
mod tests {
    use super::{DefaultEmitter, Token};
    use crate::{attr::AttrValueSyntax, Tokenizer};

    #[test]
    fn test_attribute_value_syntax() {
        let mut tokenizer = Tokenizer::new(
            "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
            DefaultEmitter::default(),
        )
        .flatten();
        let Token::StartTag(start_tag) = tokenizer.next().unwrap() else {
            panic!("expected start tag");
        };
        assert_eq!(
            start_tag.attributes.get("empty").unwrap().value_syntax(),
            None
        );
        assert_eq!(
            start_tag.attributes.get("unquoted").unwrap().value_syntax(),
            Some(AttrValueSyntax::Unquoted)
        );
        assert_eq!(
            start_tag
                .attributes
                .get("single-quoted")
                .unwrap()
                .value_syntax(),
            Some(AttrValueSyntax::SingleQuoted)
        );
        assert_eq!(
            start_tag
                .attributes
                .get("double-quoted")
                .unwrap()
                .value_syntax(),
            Some(AttrValueSyntax::DoubleQuoted)
        );
    }
}