pub(crate) mod machine;

use crate::offset::{Offset, Position};
use crate::reader::{IntoReader, Reader};
use crate::Emitter;
use machine::ControlToken;

#[cfg(feature = "integration-tests")]
pub use machine::State as InternalState;

/// An HTML tokenizer.
///
/// # Warning
///
/// Iterating over the tokenizer directly without calling [`Tokenizer::set_state`]
/// results in wrong state transitions:
///
/// ```
/// # use html5tokenizer::{BasicEmitter, Event, Tokenizer, Token};
/// let emitter = BasicEmitter::default();
/// let html = "<script><b>";
/// let mut tokens = Tokenizer::new(html, emitter).flatten();
/// assert!(matches!(tokens.next(), Some(Event::Token(Token::StartTag(_)))));
/// assert!(matches!(tokens.next(), Some(Event::Token(Token::StartTag(_)))));
/// ```
///
/// Instead use the [`NaiveParser`] (in the future this crate will also provide a proper implementation of [tree construction]).
///
/// [`NaiveParser`]: crate::NaiveParser
/// [tree construction]: https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
pub struct Tokenizer<R, O, E> {
    machine: machine::Machine<R, O, E>,
    eof: bool,
}

impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
    /// Creates a new tokenizer from some input and an emitter.
    ///
    /// Note that properly parsing HTML with this tokenizer requires you to
    /// implement [tree construction] and call [`Tokenizer::set_state`] accordingly.
    ///
    /// [tree construction]: https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
    pub fn new<'a>(reader: impl IntoReader<'a, Reader = R>, emitter: E) -> Self {
        Tokenizer {
            machine: machine::Machine::new(reader.into_reader(), emitter),
            eof: false,
        }
    }

    /// To be called when the tokenizer iterator implementation yields [`Event::CdataOpen`].
    ///
    /// For spec-compliant parsing the supplied boolean must be `true`
    /// if there is an _adjusted current node_ and it is not an element in
    /// the HTML namespace, or `false` otherwise (as per the third condition
    /// under [Markup declaration open state]).
    ///
    /// [Markup declaration open state]: https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
    pub fn handle_cdata_open(
        &mut self,
        adjusted_current_node_present_and_not_in_html_namespace: bool,
    ) {
        machine::handle_cdata_open(
            &mut self.machine,
            adjusted_current_node_present_and_not_in_html_namespace,
        );
    }

    /// Returns a mutable reference to the emitter.
    pub fn emitter_mut(&mut self) -> &mut E {
        &mut self.machine.emitter
    }
}

/// An event yielded by the [`Iterator`] implementation for the [`Tokenizer`].
#[derive(Clone, Debug)]
pub enum Event<T> {
    /// A token emitted by the [`Emitter`].
    Token(T),
    /// The state machine encountered `<![CDATA[`. You must call [`Tokenizer::handle_cdata_open`],
    /// before advancing the tokenizer iterator again.
    CdataOpen,
}

/// The states you can set the tokenizer to.
#[derive(Debug)]
#[non_exhaustive]
pub enum State {
    /// The [data state].
    ///
    /// [data state]: https://html.spec.whatwg.org/multipage/parsing.html#data-state
    Data,
    /// The [PLAINTEXT state].
    ///
    /// [PLAINTEXT state]: https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
    Plaintext,
    /// The [RCDATA state].
    ///
    /// [RCDATA state]: https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
    Rcdata,
    /// The [RAWTEXT state].
    ///
    /// [RAWTEXT state]: https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state
    Rawtext,
    /// The [script data state].
    ///
    /// [script data state]: https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
    ScriptData,
    /// The [script data escaped state].
    ///
    /// [script data escaped state]: https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
    ScriptDataEscaped,
    /// The [script data double escaped state].
    ///
    /// [script data double escaped state]: https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
    ScriptDataDoubleEscaped,
}

impl From<State> for machine::State {
    fn from(state: State) -> Self {
        match state {
            State::Data => machine::State::Data,
            State::Plaintext => machine::State::Plaintext,
            State::Rcdata => machine::State::Rcdata,
            State::Rawtext => machine::State::Rawtext,
            State::ScriptData => machine::State::ScriptData,
            State::ScriptDataEscaped => machine::State::ScriptDataEscaped,
            State::ScriptDataDoubleEscaped => machine::State::ScriptDataDoubleEscaped,
        }
    }
}

impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
    /// Test-internal function to override internal state.
    ///
    /// Only available with the `integration-tests` feature which is not public API.
    #[cfg(feature = "integration-tests")]
    pub fn set_internal_state(&mut self, state: InternalState) {
        self.machine.state = state;
    }

    /// Set the statemachine to start/continue in the given state.
    pub fn set_state(&mut self, state: State) {
        self.machine.state = state.into();
    }
}

impl<O, R, E> Iterator for Tokenizer<R, O, E>
where
    O: Offset,
    R: Reader + Position<O>,
    E: Emitter<O> + Iterator,
{
    type Item = Result<Event<E::Item>, R::Error>;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            if let Some(token) = self.machine.emitter.next() {
                return Some(Ok(Event::Token(token)));
            }

            if self.eof {
                return None;
            }

            match machine::consume(&mut self.machine) {
                Err(e) => return Some(Err(e)),
                Ok(ControlToken::Continue) => (),
                Ok(ControlToken::Eof) => {
                    self.eof = true;
                    self.machine
                        .emitter
                        .emit_eof(self.machine.reader_position());
                }
                Ok(ControlToken::CdataOpen) => return Some(Ok(Event::CdataOpen)),
            }
        }
    }
}

impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> {
    pub(crate) fn enable_naive_state_switching(&mut self) {
        self.machine.naively_switch_state = true;
    }

    /// Test-internal function to override internal state.
    ///
    /// Only available with the `integration-tests` feature which is not public API.
    #[cfg(feature = "integration-tests")]
    pub fn set_last_start_tag(&mut self, last_start_tag: &str) {
        self.machine.last_start_tag_name.clear();
        self.machine.last_start_tag_name.push_str(last_start_tag);
    }
}