use std::{fs::File, io::BufReader, ops::Range, path::Path};

use html5lib_tests::{
    parse_tests, Error as TestError, InitialState, Output, Test, Token as TestToken,
};
use html5tokenizer::{
    offset::{Offset, PosTrackingReader, Position},
    reader::Reader,
    BasicEmitter, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter,
};
use similar_asserts::assert_eq;

/// Path to a local checkout of [html5lib-tests], relative to the
/// directory containing the `Cargo.toml` file of the current crate.
///
/// [html5lib-tests]: https://github.com/html5lib/html5lib-tests
const HTML5LIB_TESTS_PATH: &str = "html5lib-tests";

// FUTURE: it would be nice to assert that HTML5LIB_TESTS_PATH matches the path defined in .gitmodules
// but this is currently blocked by:
// * Cargo not setting CARGO_WORKSPACE_DIR (see https://github.com/rust-lang/cargo/issues/3946)
// * gix-config having more dependencies than I'd want to add for this

#[test]
fn tokenizer() {
    // TODO: use a custom test harness with e.g. libtest-mimic
    let test_dir = format!("{HTML5LIB_TESTS_PATH}/tokenizer");

    let mut test_paths = glob::glob(&format!("{test_dir}/*.test"))
        .unwrap()
        .peekable();

    if test_paths.peek().is_none() {
        panic!(
            "could not find any .test files in {}, maybe try `git submodule update --init`",
            test_dir
        );
    }

    for test_path in test_paths {
        let test_path = test_path.unwrap();

        test_tokenizer_file(&test_path);
    }
}

fn test_tokenizer_file(path: &Path) {
    let fname = path.file_name().unwrap().to_str().unwrap();

    if matches!(
        fname,
        // We don't implement "Coercing an HTML DOM into an infoset" section
        "xmlViolation.test" |
        // Our parser does not operate on bytes, the input isn't valid Rust &str
        "unicodeCharsProblematic.test"
    ) {
        return;
    }

    let f = File::open(path).unwrap();
    let bf = BufReader::new(f);
    let tests = parse_tests(bf).expect(&format!("failed to parse {path:?}"));

    for (i, test) in tests.into_iter().enumerate() {
        run_test(fname, i, test);
    }
}

fn run_test(fname: &str, test_i: usize, test: Test) {
    for state in &test.initial_states {
        run_test_inner(
            fname,
            test_i,
            &test,
            state,
            Tokenizer::new(&test.input, BasicEmitter::default()),
            "BasicEmitter string",
        );

        run_test_inner(
            fname,
            test_i,
            &test,
            state,
            Tokenizer::new(
                BufReader::new(test.input.as_bytes()),
                BasicEmitter::default(),
            ),
            "BasicEmitter bufread",
        );

        run_test_inner(
            fname,
            test_i,
            &test,
            state,
            Tokenizer::new(
                PosTrackingReader::new(&test.input),
                TracingEmitter::default(),
            ),
            "TracingEmitter string",
        );

        run_test_inner(
            fname,
            test_i,
            &test,
            state,
            Tokenizer::new(
                PosTrackingReader::new(BufReader::new(test.input.as_bytes())),
                TracingEmitter::default(),
            ),
            "TracingEmitter bufread",
        );
    }
}

fn run_test_inner<R, O, E, T>(
    fname: &str,
    test_i: usize,
    test: &Test,
    state: &InitialState,
    mut tokenizer: Tokenizer<R, O, E>,
    tokenizer_info: &str,
) where
    R: Reader + Position<O>,
    O: Offset,
    E: Emitter<O> + Iterator<Item = T> + DrainErrors<O>,
    T: Into<Token>,
{
    println!(
        "==== FILE {}, TEST {}, STATE {:?}, TOKENIZER {} ====",
        fname, test_i, state, tokenizer_info,
    );
    println!("description: {}", test.description);
    tokenizer.set_internal_state(match state {
        InitialState::Data => InternalState::Data,
        InitialState::Plaintext => InternalState::Plaintext,
        InitialState::Rcdata => InternalState::Rcdata,
        InitialState::Rawtext => InternalState::Rawtext,
        InitialState::ScriptData => InternalState::ScriptData,
        InitialState::CdataSection => InternalState::CdataSection,
    });
    if let Some(last_start_tag) = &test.last_start_tag {
        tokenizer.set_last_start_tag(last_start_tag);
    }

    let mut actual_tokens = Vec::new();

    while let Some(event) = tokenizer.next() {
        let token = match event.unwrap() {
            Event::CdataOpen => {
                tokenizer.handle_cdata_open(false);
                continue;
            }
            Event::Token(token) => token.into(),
        };

        match token {
            Token::StartTag(tag) => actual_tokens.push(TestToken::StartTag {
                name: tag.name,
                attributes: tag
                    .attributes
                    .into_iter()
                    .map(|attr| (attr.name, attr.value))
                    .collect(),
                self_closing: tag.self_closing,
            }),
            Token::EndTag(tag) => actual_tokens.push(TestToken::EndTag { name: tag.name }),
            Token::Char(c) => {
                // Coalesce all adjacent character tokens into a single string.
                if let Some(TestToken::Character(s)) = actual_tokens.last_mut() {
                    s.push(c);
                } else {
                    actual_tokens.push(TestToken::Character(c.into()));
                }
            }
            Token::Comment(comment) => actual_tokens.push(TestToken::Comment(comment)),
            Token::Doctype(doctype) => actual_tokens.push(TestToken::Doctype {
                name: doctype.name,
                public_id: doctype.public_id,
                system_id: doctype.system_id,
                force_quirks: doctype.force_quirks,
            }),
            Token::EndOfFile => {}
        };
    }

    assert_eq!(
        Output {
            errors: tokenizer
                .emitter_mut()
                .drain_errors()
                .map(|(e, _)| TestError {
                    code: e.code().to_string()
                })
                .collect(),
            tokens: actual_tokens,
        },
        test.output,
    );
}

trait DrainErrors<O> {
    fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_>;
}

impl<O> DrainErrors<O> for BasicEmitter<O> {
    fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_> {
        Box::new(self.drain_errors())
    }
}

impl DrainErrors<usize> for TracingEmitter {
    fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<usize>)> + '_> {
        Box::new(self.drain_errors())
    }
}