use std::ops::Range;

use codespan_reporting::{
    self,
    diagnostic::{Diagnostic, Label},
    files::SimpleFiles,
    term::{self, termcolor::Buffer},
};
use html5tokenizer::{offset::PosTrackingReader, NaiveParser, Token};
use insta::assert_snapshot;
use similar_asserts::assert_eq;

fn tokenizer(html: &'static str) -> impl Iterator<Item = Token<usize>> {
    NaiveParser::new(PosTrackingReader::new(html)).flatten()
}

/// Just a convenient type alias for labeler closures calling `tokens.next()`
/// since Rust apparently cannot infer the type (requiring an annotation).
type TokenIter = Box<dyn Iterator<Item = Token<usize>>>;

fn test_and_annotate<S: AsRef<str> + Clone>(
    html: &'static str,
    labeler: impl Fn(TokenIter) -> Vec<(Range<usize>, S)>,
) -> String {
    let labels = labeler(Box::new(tokenizer(html)));

    assert_char_encoding_independence(html, labeler);

    annotate(html, labels)
}

fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String {
    let mut files = SimpleFiles::new();
    let file_id = files.add("test.html", html);

    let diagnostic = Diagnostic::note().with_labels(
        labels
            .into_iter()
            .map(|(span, text)| Label::primary(file_id, span).with_message(text.as_ref()))
            .collect(),
    );

    let mut writer = Buffer::no_color();
    let config = codespan_reporting::term::Config::default();
    term::emit(&mut writer, &config, &files, &diagnostic).unwrap();
    let msg = std::str::from_utf8(writer.as_slice()).unwrap();

    // strip the filename and the line numbers since we don't need them
    // (apparently they cannot be disabled in codespan_reporting)
    msg.lines()
        .skip(3)
        .flat_map(|l| l.split_once("│ ").map(|s| format!("{}\n", s.1.trim_end())))
        .collect::<Vec<_>>()
        .join("")
}

#[test]
fn start_tag_span() {
    let html = "<x> <xyz> <xyz  > <xyz/>";
    let labeler = |tokens| {
        let mut labels = Vec::new();
        for token in tokens {
            if let Token::StartTag(tag) = token {
                labels.push((tag.span, ""));
            }
        }
        labels
    };
    assert_snapshot!(test_and_annotate(html, labeler), @r###"
    <x> <xyz> <xyz  > <xyz/>
    ^^^ ^^^^^ ^^^^^^^ ^^^^^^
    "###);
}

#[test]
fn end_tag_span() {
    let html = "</x> </xyz> </xyz  > </xyz/>";
    let labeler = |tokens| {
        let mut labels = Vec::new();
        for token in tokens {
            if let Token::EndTag(tag) = token {
                labels.push((tag.span, ""));
            }
        }
        labels
    };
    assert_snapshot!(test_and_annotate(html, labeler), @r###"
    </x> </xyz> </xyz  > </xyz/>
    ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^
    "###);
}

#[test]
fn start_tag_name_span() {
    let html = "<x> <xyz> <xyz  > <xyz/>";
    let labeler = |tokens| {
        let mut labels = Vec::new();
        for token in tokens {
            if let Token::StartTag(tag) = token {
                labels.push((tag.name_span, ""));
            }
        }
        labels
    };
    assert_snapshot!(test_and_annotate(html, labeler), @r###"
    <x> <xyz> <xyz  > <xyz/>
     ^   ^^^   ^^^     ^^^
    "###);
}

#[test]
fn end_tag_name_span() {
    let html = "</x> </xyz> </xyz  > </xyz/>";
    let labeler = |tokens| {
        let mut labels = Vec::new();
        for token in tokens {
            if let Token::EndTag(tag) = token {
                labels.push((tag.name_span, ""));
            }
        }
        labels
    };
    assert_snapshot!(test_and_annotate(html, labeler), @r###"
    </x> </xyz> </xyz  > </xyz/>
      ^    ^^^    ^^^      ^^^
    "###);
}

#[test]
fn attribute_name_span() {
    let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>";
    let labeler = |mut tokens: TokenIter| {
        let mut labels = Vec::new();
        let Token::StartTag(tag) = tokens.next().unwrap() else {
            panic!("expected start tag")
        };
        for attr in &tag.attributes {
            labels.push((attr.name_span(), ""));
        }
        labels
    };
    assert_snapshot!(test_and_annotate(html, labeler), @r###"
    <test x xyz y=VAL xy=VAL z = VAL yzx = VAL>
          ^ ^^^ ^     ^^     ^       ^^^
    "###);
}

#[test]
fn attribute_value_span() {
    let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>";
    let labeler = |mut tokens: TokenIter| {
        let mut labels = Vec::new();
        let Token::StartTag(tag) = tokens.next().unwrap() else {
            panic!("expected start tag")
        };
        for attr in &tag.attributes {
            labels.push((attr.value_span().unwrap(), ""));
        }
        labels
    };
    assert_snapshot!(test_and_annotate(html, labeler), @r###"
    <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''>
            ^^^^^^^^     ^^^^^^^^    ^^^^^^^^^^^^^      ^^^^^^^^^^^^^         ^
    "###);
}

#[test]
fn attribute_value_with_char_ref() {
    let html = "<test x=&amp; y='&amp;' z=\"&amp;\">";
    let labeler = |mut tokens: TokenIter| {
        let mut labels = Vec::new();
        let Token::StartTag(tag) = tokens.next().unwrap() else {
            panic!("expected start tag")
        };
        for attr in &tag.attributes {
            labels.push((attr.value_span().unwrap(), ""));
        }
        labels
    };
    assert_snapshot!(test_and_annotate(html, labeler), @r###"
    <test x=&amp; y='&amp;' z="&amp;">
            ^^^^^    ^^^^^     ^^^^^
    "###);
}

#[test]
fn comment_data_span() {
    #[rustfmt::skip]
    let cases = [
        "<!-- Why are you looking at the source code? -->",
        "<!-- Why are you looking at the source code? --",
        "<!-- Why are you looking at the source code? -",
        "<!-- Why are you looking at the source code?",
        "<!--",
        "<!-->",
        "<!---",
        "<!--->",
        "<!-- Why are you looking at the source code? ->",
        "<!-- Why are you looking at the source code? --!>",
        "<!-- Why are you looking at the source code? --!",

        // bogus comments
        "<! Why are you looking at the source code? -->",
        "<!",
    ];

    let mut annotated = String::new();
    for case in cases {
        let labeler = |tokens: TokenIter| {
            let Token::Comment(comment) = tokens
                .filter(|t| !matches!(t, Token::Error { .. }))
                .next()
                .unwrap()
            else {
                panic!("expected comment");
            };
            vec![(comment.data_span(), "")]
        };

        annotated.push_str(&test_and_annotate(case, labeler));
    }

    assert_snapshot!(annotated, @r###"
    <!-- Why are you looking at the source code? -->
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    <!-- Why are you looking at the source code? --
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    <!-- Why are you looking at the source code? -
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    <!-- Why are you looking at the source code?
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    <!--
        ^
    <!-->
        ^
    <!---
        ^
    <!--->
        ^
    <!-- Why are you looking at the source code? ->
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    <!-- Why are you looking at the source code? --!>
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    <!-- Why are you looking at the source code? --!
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    <! Why are you looking at the source code? -->
      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    <!
      ^
    "###);

    for (idx, case) in cases.iter().enumerate() {
        let Token::Comment(comment) = tokenizer(case)
            .filter(|t| !matches!(t, Token::Error { .. }))
            .next()
            .unwrap()
        else {
            panic!("expected comment");
        };
        assert_eq!(case[comment.data_span()], comment.data, "case {idx}");
    }
}

#[test]
fn doctype_span() {
    #[rustfmt::skip]
    let cases = [
        r#"<!DOCTYPE       HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"     >"#,
    ];

    let mut annotated = String::new();
    for case in cases {
        let labeler = |tokens: TokenIter| {
            let Token::Doctype(doctype) = tokens
                .filter(|t| !matches!(t, Token::Error { .. }))
                .next()
                .unwrap()
            else {
                panic!("expected doctype");
            };
            vec![(doctype.span, "")]
        };
        annotated.push_str(&test_and_annotate(case, labeler));
    }

    assert_snapshot!(annotated, @r###"
    <!DOCTYPE       HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"     >
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    "###);
}

#[test]
fn doctype_id_spans() {
    #[rustfmt::skip]
    let cases = [
        r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#,
    ];

    let mut annotated = String::new();
    for case in cases {
        let labeler = |tokens: TokenIter| {
            let Token::Doctype(doctype) = tokens
                .filter(|t| !matches!(t, Token::Error { .. }))
                .next()
                .unwrap()
            else {
                panic!("expected doctype");
            };

            let mut labels = Vec::new();
            if let Some(public_id_span) = doctype.public_id_span() {
                labels.push((public_id_span, "public id"));
            }
            if let Some(system_id_span) = doctype.system_id_span() {
                labels.push((system_id_span, "system id"));
            }
            labels
        };

        annotated.push_str(&test_and_annotate(case, labeler));
    }

    assert_snapshot!(annotated, @r###"
    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
                           ^^^^^^^^^^^^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id
                           │
                           public id
    "###);
}

fn annotate_errors(html: &'static str) -> String {
    for token in tokenizer(html) {
        let Token::Error { span, .. } = token else {
            continue;
        };
        if span.start == span.end {
            if span.start != html.len() {
                panic!("empty error spans are only allowed at the very end of the source (for eof errors)");
            }
        } else {
            assert!(span.start < span.end);
            assert!(span.end <= html.len());
        }
    }

    let labeler = |tokens| {
        let mut labels = Vec::new();
        for token in tokens {
            let Token::Error { error, span } = token else {
                continue;
            };

            labels.push((span, error.code()));
        }
        labels
    };

    test_and_annotate(html, labeler)
}

#[test]
fn tests_for_errors_are_sorted() {
    let source_of_this_file = std::fs::read_to_string(file!()).unwrap();
    let mut error_tests: Vec<_> = source_of_this_file
        .lines()
        .filter(|l| l.starts_with("fn error_"))
        .collect();
    let error_tests_found_order = error_tests.join("\n");
    error_tests.sort();
    let error_tests_sorted = error_tests.join("\n");
    assert_eq!(error_tests_found_order, error_tests_sorted);
}

#[test]
fn error_char_ref_absence_of_digits() {
    let html = "&#qux;";
    assert_snapshot!(annotate_errors(html), @r###"
    &#qux;
    ^^^ absence-of-digits-in-numeric-character-reference
    "###);
}

#[test]
fn error_char_ref_control_char() {
    let html = "&#127;";
    assert_snapshot!(annotate_errors(html), @r###"
    &#127;
    ^^^^^^ control-character-reference
    "###);
}

#[test]
fn error_char_ref_missing_semicolon() {
    let html = "&not";
    assert_snapshot!(annotate_errors(html), @r###"
    &not
        ^ missing-semicolon-after-character-reference
    "###);
}

#[test]
fn error_char_ref_noncharacter() {
    let html = "&#xFDD0;";
    assert_snapshot!(annotate_errors(html), @r###"
    &#xFDD0;
    ^^^^^^^^ noncharacter-character-reference
    "###);
}

#[test]
fn error_char_ref_null_char() {
    let html = "&#0;";
    assert_snapshot!(annotate_errors(html), @r###"
    &#0;
    ^^^^ null-character-reference
    "###);
}

#[test]
fn error_char_ref_outside_unicode_range() {
    let html = "&#9999999;";
    assert_snapshot!(annotate_errors(html), @r###"
    &#9999999;
    ^^^^^^^^^^ character-reference-outside-unicode-range
    "###);
}

#[test]
fn error_char_ref_surrogate() {
    let html = "&#xD800;";
    assert_snapshot!(annotate_errors(html), @r###"
    &#xD800;
    ^^^^^^^^ surrogate-character-reference
    "###);
}

#[test]
fn error_char_ref_unknown_named() {
    let html = "The pirate says &arrrrr;";
    assert_snapshot!(annotate_errors(html), @r###"
    The pirate says &arrrrr;
                    ^^^^^^^^ unknown-named-character-reference
    "###);
}

#[test]
fn error_duplicate_attribute() {
    let html = "Does this open two pages? <a href=foo.html href=bar.html>";
    assert_snapshot!(annotate_errors(html), @r###"
    Does this open two pages? <a href=foo.html href=bar.html>
                                               ^^^^ duplicate-attribute
    "###);
}

#[test]
fn error_end_tag_with_attributes() {
    let html = "</end-tag first second=value>";
    assert_snapshot!(annotate_errors(html), @r###"
    </end-tag first second=value>
                    ^^^^^^ end-tag-with-attributes
    "###);
}

#[test]
fn error_end_tag_with_trailing_solidus() {
    let html = "Do you start or do you end? </yes/>";
    assert_snapshot!(annotate_errors(html), @r###"
    Do you start or do you end? </yes/>
                                     ^ end-tag-with-trailing-solidus
    "###);
}

#[test]
fn error_eof_before_tag_name() {
    let html = "<";
    assert_snapshot!(annotate_errors(html), @r###"
    <
     ^ eof-before-tag-name
    "###);
}

// TODO: add error_eof_in_cdata test
// blocked by lack of proper tree constructor (NaiveParser doesn't parse CDATA sections)

#[test]
fn error_eof_in_comment() {
    let html = "<!--";
    assert_snapshot!(annotate_errors(html), @r###"
    <!--
        ^ eof-in-comment
    "###);
}

#[test]
fn error_eof_in_doctype() {
    let html = "<!doctype html";
    assert_snapshot!(annotate_errors(html), @r###"
    <!doctype html
                  ^ eof-in-doctype
    "###);
}

#[test]
fn error_eof_in_script_html_comment_like_text() {
    let html = "<script><!--";
    assert_snapshot!(annotate_errors(html), @r###"
    <script><!--
                ^ eof-in-script-html-comment-like-text
    "###);
}

#[test]
fn error_eof_in_tag() {
    let html = "</sarcasm";
    assert_snapshot!(annotate_errors(html), @r###"
    </sarcasm
             ^ eof-in-tag
    "###);
}

#[test]
fn error_invalid_first_character_of_tag_name() {
    let html = "Please mind the gap: < test";
    assert_snapshot!(annotate_errors(html), @r###"
    Please mind the gap: < test
                          ^ invalid-first-character-of-tag-name
    "###);
}

fn assert_char_encoding_independence<S: AsRef<str> + Clone>(
    html: &'static str,
    labeler: impl Fn(TokenIter) -> Vec<(Range<usize>, S)>,
) {
    let utf8_tokens = NaiveParser::new(PosTrackingReader::new(html)).flatten();
    let string_reader = html5tokenizer::reader::IntoReader::into_reader(html);
    let utf16_tokens =
        NaiveParser::new(PosTrackingReader::new(Utf16Reader(string_reader))).flatten();
    let utf8_labels = labeler(Box::new(utf8_tokens));

    for (idx, (span, _)) in labeler(Box::new(utf16_tokens)).into_iter().enumerate() {
        let expected_utf16_span = html[..utf8_labels[idx].0.start].encode_utf16().count() * 2
            ..html[..utf8_labels[idx].0.end].encode_utf16().count() * 2;
        assert_eq!(
            span,
            expected_utf16_span,
            "UTF-16 span didn't match the UTF-8 span, which looks like:\n{}",
            annotate(html, vec![utf8_labels[idx].clone()])
        );
    }
}

struct Utf16Reader<'a>(html5tokenizer::reader::StringReader<'a>);

impl html5tokenizer::reader::Reader for Utf16Reader<'_> {
    type Error = std::convert::Infallible;

    fn read_char(&mut self) -> Result<Option<char>, Self::Error> {
        self.0.read_char()
    }

    fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> {
        self.0.try_read_string(s, case_sensitive)
    }

    fn len_of_char_in_current_encoding(&self, c: char) -> usize {
        c.len_utf16() * 2
    }
}