1 files changed, 0 insertions, 218 deletions
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs
deleted file mode 100644
index 2d3e4cb..0000000
--- a/integration_tests/tests/test_html5lib.rs
+++ /dev/null
@@ -1,218 +0,0 @@
-use std::{fs::File, io::BufReader, ops::Range, path::Path};
-
-use html5lib_tests::{
-    parse_tests, Error as TestError, InitialState, Output, Test, Token as TestToken,
-};
-use html5tokenizer::{
-    offset::{Offset, PosTrackingReader, Position},
-    reader::Reader,
-    BasicEmitter, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter,
-};
-use similar_asserts::assert_eq;
-
-/// Path to a local checkout of [html5lib-tests], relative to the
-/// directory containing the `Cargo.toml` file of the current crate.
-///
-/// [html5lib-tests]: https://github.com/html5lib/html5lib-tests
-const HTML5LIB_TESTS_PATH: &str = "html5lib-tests";
-
-// FUTURE: it would be nice to assert that HTML5LIB_TESTS_PATH matches the path defined in .gitmodules
-// but this is currently blocked by:
-// * Cargo not setting CARGO_WORKSPACE_DIR (see https://github.com/rust-lang/cargo/issues/3946)
-// * gix-config having more dependencies than I'd want to add for this
-
-#[test]
-fn tokenizer() {
-    // TODO: use a custom test harness with e.g. libtest-mimic
-    let test_dir = format!("{HTML5LIB_TESTS_PATH}/tokenizer");
-
-    let mut test_paths = glob::glob(&format!("{test_dir}/*.test"))
-        .unwrap()
-        .peekable();
-
-    if test_paths.peek().is_none() {
-        panic!(
-            "could not find any .test files in {}, maybe try `git submodule update --init`",
-            test_dir
-        );
-    }
-
-    for test_path in test_paths {
-        let test_path = test_path.unwrap();
-
-        test_tokenizer_file(&test_path);
-    }
-}
-
-fn test_tokenizer_file(path: &Path) {
-    let fname = path.file_name().unwrap().to_str().unwrap();
-
-    if matches!(
-        fname,
-        // We don't implement "Coercing an HTML DOM into an infoset" section
-        "xmlViolation.test" |
-        // Our parser does not operate on bytes, the input isn't valid Rust &str
-        "unicodeCharsProblematic.test"
-    ) {
-        return;
-    }
-
-    let f = File::open(path).unwrap();
-    let bf = BufReader::new(f);
-    let tests = parse_tests(bf).expect(&format!("failed to parse {path:?}"));
-
-    for (i, test) in tests.into_iter().enumerate() {
-        run_test(fname, i, test);
-    }
-}
-
-fn run_test(fname: &str, test_i: usize, test: Test) {
-    for state in &test.initial_states {
-        run_test_inner(
-            fname,
-            test_i,
-            &test,
-            state,
-            Tokenizer::new(&test.input, BasicEmitter::default()),
-            "BasicEmitter string",
-        );
-
-        run_test_inner(
-            fname,
-            test_i,
-            &test,
-            state,
-            Tokenizer::new(
-                BufReader::new(test.input.as_bytes()),
-                BasicEmitter::default(),
-            ),
-            "BasicEmitter bufread",
-        );
-
-        run_test_inner(
-            fname,
-            test_i,
-            &test,
-            state,
-            Tokenizer::new(
-                PosTrackingReader::new(&test.input),
-                TracingEmitter::default(),
-            ),
-            "TracingEmitter string",
-        );
-
-        run_test_inner(
-            fname,
-            test_i,
-            &test,
-            state,
-            Tokenizer::new(
-                PosTrackingReader::new(BufReader::new(test.input.as_bytes())),
-                TracingEmitter::default(),
-            ),
-            "TracingEmitter bufread",
-        );
-    }
-}
-
-fn run_test_inner<R, O, E, T>(
-    fname: &str,
-    test_i: usize,
-    test: &Test,
-    state: &InitialState,
-    mut tokenizer: Tokenizer<R, O, E>,
-    tokenizer_info: &str,
-) where
-    R: Reader + Position<O>,
-    O: Offset,
-    E: Emitter<O> + Iterator<Item = T> + DrainErrors<O>,
-    T: Into<Token>,
-{
-    println!(
-        "==== FILE {}, TEST {}, STATE {:?}, TOKENIZER {} ====",
-        fname, test_i, state, tokenizer_info,
-    );
-    println!("description: {}", test.description);
-    tokenizer.set_internal_state(match state {
-        InitialState::Data => InternalState::Data,
-        InitialState::Plaintext => InternalState::Plaintext,
-        InitialState::Rcdata => InternalState::Rcdata,
-        InitialState::Rawtext => InternalState::Rawtext,
-        InitialState::ScriptData => InternalState::ScriptData,
-        InitialState::CdataSection => InternalState::CdataSection,
-    });
-    if let Some(last_start_tag) = &test.last_start_tag {
-        tokenizer.set_last_start_tag(last_start_tag);
-    }
-
-    let mut actual_tokens = Vec::new();
-
-    while let Some(event) = tokenizer.next() {
-        let token = match event.unwrap() {
-            Event::CdataOpen => {
-                tokenizer.handle_cdata_open(false);
-                continue;
-            }
-            Event::Token(token) => token.into(),
-        };
-
-        match token {
-            Token::StartTag(tag) => actual_tokens.push(TestToken::StartTag {
-                name: tag.name,
-                attributes: tag
-                    .attributes
-                    .into_iter()
-                    .map(|attr| (attr.name, attr.value))
-                    .collect(),
-                self_closing: tag.self_closing,
-            }),
-            Token::EndTag(tag) => actual_tokens.push(TestToken::EndTag { name: tag.name }),
-            Token::Char(c) => {
-                // Coalesce all adjacent character tokens into a single string.
-                if let Some(TestToken::Character(s)) = actual_tokens.last_mut() {
-                    s.push(c);
-                } else {
-                    actual_tokens.push(TestToken::Character(c.into()));
-                }
-            }
-            Token::Comment(comment) => actual_tokens.push(TestToken::Comment(comment)),
-            Token::Doctype(doctype) => actual_tokens.push(TestToken::Doctype {
-                name: doctype.name,
-                public_id: doctype.public_id,
-                system_id: doctype.system_id,
-                force_quirks: doctype.force_quirks,
-            }),
-            Token::EndOfFile => {}
-        };
-    }
-
-    assert_eq!(
-        Output {
-            errors: tokenizer
-                .emitter_mut()
-                .drain_errors()
-                .map(|(e, _)| TestError {
-                    code: e.code().to_string()
-                })
-                .collect(),
-            tokens: actual_tokens,
-        },
-        test.output,
-    );
-}
-
-trait DrainErrors<O> {
-    fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_>;
-}
-
-impl<O> DrainErrors<O> for BasicEmitter<O> {
-    fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_> {
-        Box::new(self.drain_errors())
-    }
-}
-
-impl DrainErrors<usize> for TracingEmitter {
-    fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<usize>)> + '_> {
-        Box::new(self.drain_errors())
-    }
-}