use serde::{de, Deserialize};
use std::collections::BTreeMap;

pub fn parse_tests(
    reader: impl std::io::Read,
) -> Result<impl Iterator<Item = Test>, serde_json::Error> {
    let Tests { tests } = serde_json::from_reader(reader)?;
    Ok(tests.into_iter().map(undo_double_escaping))
}

#[derive(Debug, Deserialize)]
pub enum InitialState {
    #[serde(rename = "Data state")]
    Data,
    #[serde(rename = "PLAINTEXT state")]
    PlainText,
    #[serde(rename = "RCDATA state")]
    RcData,
    #[serde(rename = "RAWTEXT state")]
    RawText,
    #[serde(rename = "Script data state")]
    ScriptData,
    #[serde(rename = "CDATA section state")]
    CdataSection,
}

fn initial_states_default() -> Vec<InitialState> {
    vec![InitialState::Data]
}

#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Test {
    pub description: String,
    pub input: String,
    #[serde(default = "initial_states_default")]
    pub initial_states: Vec<InitialState>,
    #[serde(flatten)]
    pub output: Output,
    #[serde(default)]
    double_escaped: bool,
    #[serde(default)]
    pub last_start_tag: Option<String>,
}

#[derive(Deserialize, PartialEq, Eq, Debug)]
pub struct Output {
    #[serde(default)]
    pub errors: Vec<Error>,
    #[serde(rename = "output")]
    pub tokens: Vec<Token>,
}

#[derive(Debug, PartialEq, Eq)]
pub enum Token {
    Doctype {
        name: Option<String>,
        public_id: Option<String>,
        system_id: Option<String>,
        force_quirks: bool,
    },
    StartTag {
        name: String,
        attributes: BTreeMap<String, String>,
        self_closing: bool,
    },
    EndTag {
        name: String,
    },
    Comment(String),
    Character(String),
}

impl<'de> Deserialize<'de> for Token {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
        deserializer.deserialize_seq(TokenVisitor)
    }
}

#[derive(Deserialize)]
enum TokenType {
    #[serde(rename = "DOCTYPE")]
    Doctype,
    StartTag,
    EndTag,
    Comment,
    Character,
}

struct TokenVisitor;

impl<'de> de::Visitor<'de> for TokenVisitor {
    type Value = Token;

    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
        formatter.write_str("an array describing a token")
    }

    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
    where
        A: serde::de::SeqAccess<'de>,
    {
        let typ: TokenType = seq.next_element()?.ok_or(
            de::Error::custom(
                r#"expected first array element to be one of "DOCTYPE", "StartTag", "EndTag", "Comment" or "Character""#,
            )
        )?;

        Ok(match typ {
            TokenType::Doctype => Token::Doctype {
                name: seq
                    .next_element()?
                    .ok_or(de::Error::missing_field("name"))?,
                public_id: seq
                    .next_element()?
                    .ok_or(de::Error::missing_field("public_id"))?,
                system_id: seq
                    .next_element()?
                    .ok_or(de::Error::missing_field("system_id"))?,
                force_quirks: !seq
                    .next_element()?
                    .ok_or(de::Error::missing_field("correctness"))?,
            },
            TokenType::StartTag => Token::StartTag {
                name: seq
                    .next_element()?
                    .ok_or(de::Error::missing_field("name"))?,
                attributes: seq
                    .next_element()?
                    .ok_or(de::Error::missing_field("attributes"))?,
                self_closing: seq.next_element()?.unwrap_or_default(),
            },
            TokenType::EndTag => Token::EndTag {
                name: seq
                    .next_element()?
                    .ok_or(de::Error::missing_field("name"))?,
            },
            TokenType::Comment => Token::Comment(
                seq.next_element()?
                    .ok_or(de::Error::missing_field("data"))?,
            ),
            TokenType::Character => Token::Character(
                seq.next_element()?
                    .ok_or(de::Error::missing_field("data"))?,
            ),
        })
    }
}

#[derive(Deserialize, Debug, Eq, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct Error {
    pub code: String,
    // TODO: lineno and column?
}

#[derive(Deserialize)]
struct Tests {
    tests: Vec<Test>,
}

fn undo_double_escaping(mut test: Test) -> Test {
    if test.double_escaped {
        test.input = unescape(&test.input);

        test.output.tokens = test
            .output
            .tokens
            .into_iter()
            .map(|token| match token {
                Token::Character(x) => Token::Character(unescape(&x)),
                Token::Comment(x) => Token::Comment(unescape(&x)),
                token => token,
            })
            .collect();
    }

    test
}

/// Implements the escape sequences described in the tokenizer tests of html5lib-tests (and nothing
/// more)
fn unescape(data: &str) -> String {
    let mut stream = data.chars();
    let mut rv = String::new();

    loop {
        match stream.next() {
            Some('\\') => (),
            Some(x) => {
                rv.push(x);
                continue;
            }
            None => break,
        }

        match stream.next() {
            Some('u') => (),
            x => panic!("unexpected escape: {:?}", x),
        }

        let orig_len = rv.len();

        for _ in 0..4 {
            rv.push(match stream.next() {
                Some(x) => x,
                None => panic!("unexpected eof after \\u"),
            });
        }

        let c = u32::from_str_radix(&rv[orig_len..], 16).expect("failed to parse as hex");
        let c = char::from_u32(c).expect("bad character");
        rv.truncate(orig_len);
        rv.push(c);
    }

    rv
}