diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-27 09:25:12 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-28 11:00:01 +0200 |
commit | b027ecdb397c2e378491f847660f8eeb740e8cf6 (patch) | |
tree | 1f910e8974c1f37706b3ab78d4214977b36fe74a /integration_tests/tests/test_html5lib.rs | |
parent | 635a571ee76bf7fdaaf01c204f30289489b80c1a (diff) |
chore: rename integration tests
Diffstat (limited to 'integration_tests/tests/test_html5lib.rs')
-rw-r--r-- | integration_tests/tests/test_html5lib.rs | 218 |
1 files changed, 0 insertions, 218 deletions
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs deleted file mode 100644 index 2d3e4cb..0000000 --- a/integration_tests/tests/test_html5lib.rs +++ /dev/null @@ -1,218 +0,0 @@ -use std::{fs::File, io::BufReader, ops::Range, path::Path}; - -use html5lib_tests::{ - parse_tests, Error as TestError, InitialState, Output, Test, Token as TestToken, -}; -use html5tokenizer::{ - offset::{Offset, PosTrackingReader, Position}, - reader::Reader, - BasicEmitter, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter, -}; -use similar_asserts::assert_eq; - -/// Path to a local checkout of [html5lib-tests], relative to the -/// directory containing the `Cargo.toml` file of the current crate. -/// -/// [html5lib-tests]: https://github.com/html5lib/html5lib-tests -const HTML5LIB_TESTS_PATH: &str = "html5lib-tests"; - -// FUTURE: it would be nice to assert that HTML5LIB_TESTS_PATH matches the path defined in .gitmodules -// but this is currently blocked by: -// * Cargo not setting CARGO_WORKSPACE_DIR (see https://github.com/rust-lang/cargo/issues/3946) -// * gix-config having more dependencies than I'd want to add for this - -#[test] -fn tokenizer() { - // TODO: use a custom test harness with e.g. libtest-mimic - let test_dir = format!("{HTML5LIB_TESTS_PATH}/tokenizer"); - - let mut test_paths = glob::glob(&format!("{test_dir}/*.test")) - .unwrap() - .peekable(); - - if test_paths.peek().is_none() { - panic!( - "could not find any .test files in {}, maybe try `git submodule update --init`", - test_dir - ); - } - - for test_path in test_paths { - let test_path = test_path.unwrap(); - - test_tokenizer_file(&test_path); - } -} - -fn test_tokenizer_file(path: &Path) { - let fname = path.file_name().unwrap().to_str().unwrap(); - - if matches!( - fname, - // We don't implement "Coercing an HTML DOM into an infoset" section - "xmlViolation.test" | - // Our parser does not operate on bytes, the input isn't valid Rust &str - "unicodeCharsProblematic.test" - ) { - return; - } - - let f = File::open(path).unwrap(); - let bf = BufReader::new(f); - let tests = parse_tests(bf).expect(&format!("failed to parse {path:?}")); - - for (i, test) in tests.into_iter().enumerate() { - run_test(fname, i, test); - } -} - -fn run_test(fname: &str, test_i: usize, test: Test) { - for state in &test.initial_states { - run_test_inner( - fname, - test_i, - &test, - state, - Tokenizer::new(&test.input, BasicEmitter::default()), - "BasicEmitter string", - ); - - run_test_inner( - fname, - test_i, - &test, - state, - Tokenizer::new( - BufReader::new(test.input.as_bytes()), - BasicEmitter::default(), - ), - "BasicEmitter bufread", - ); - - run_test_inner( - fname, - test_i, - &test, - state, - Tokenizer::new( - PosTrackingReader::new(&test.input), - TracingEmitter::default(), - ), - "TracingEmitter string", - ); - - run_test_inner( - fname, - test_i, - &test, - state, - Tokenizer::new( - PosTrackingReader::new(BufReader::new(test.input.as_bytes())), - TracingEmitter::default(), - ), - "TracingEmitter bufread", - ); - } -} - -fn run_test_inner<R, O, E, T>( - fname: &str, - test_i: usize, - test: &Test, - state: &InitialState, - mut tokenizer: Tokenizer<R, O, E>, - tokenizer_info: &str, -) where - R: Reader + Position<O>, - O: Offset, - E: Emitter<O> + Iterator<Item = T> + DrainErrors<O>, - T: Into<Token>, -{ - println!( - "==== FILE {}, TEST {}, STATE {:?}, TOKENIZER {} ====", - fname, test_i, state, tokenizer_info, - ); - println!("description: {}", test.description); - tokenizer.set_internal_state(match state { - InitialState::Data => InternalState::Data, - InitialState::Plaintext => InternalState::Plaintext, - InitialState::Rcdata => InternalState::Rcdata, - InitialState::Rawtext => InternalState::Rawtext, - InitialState::ScriptData => InternalState::ScriptData, - InitialState::CdataSection => InternalState::CdataSection, - }); - if let Some(last_start_tag) = &test.last_start_tag { - tokenizer.set_last_start_tag(last_start_tag); - } - - let mut actual_tokens = Vec::new(); - - while let Some(event) = tokenizer.next() { - let token = match event.unwrap() { - Event::CdataOpen => { - tokenizer.handle_cdata_open(false); - continue; - } - Event::Token(token) => token.into(), - }; - - match token { - Token::StartTag(tag) => actual_tokens.push(TestToken::StartTag { - name: tag.name, - attributes: tag - .attributes - .into_iter() - .map(|attr| (attr.name, attr.value)) - .collect(), - self_closing: tag.self_closing, - }), - Token::EndTag(tag) => actual_tokens.push(TestToken::EndTag { name: tag.name }), - Token::Char(c) => { - // Coalesce all adjacent character tokens into a single string. - if let Some(TestToken::Character(s)) = actual_tokens.last_mut() { - s.push(c); - } else { - actual_tokens.push(TestToken::Character(c.into())); - } - } - Token::Comment(comment) => actual_tokens.push(TestToken::Comment(comment)), - Token::Doctype(doctype) => actual_tokens.push(TestToken::Doctype { - name: doctype.name, - public_id: doctype.public_id, - system_id: doctype.system_id, - force_quirks: doctype.force_quirks, - }), - Token::EndOfFile => {} - }; - } - - assert_eq!( - Output { - errors: tokenizer - .emitter_mut() - .drain_errors() - .map(|(e, _)| TestError { - code: e.code().to_string() - }) - .collect(), - tokens: actual_tokens, - }, - test.output, - ); -} - -trait DrainErrors<O> { - fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_>; -} - -impl<O> DrainErrors<O> for BasicEmitter<O> { - fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_> { - Box::new(self.drain_errors()) - } -} - -impl DrainErrors<usize> for TracingEmitter { - fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<usize>)> + '_> { - Box::new(self.drain_errors()) - } -} |