diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-04 09:27:30 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-28 07:26:44 +0200 |
commit | 82961e1f8c85d6961410fd007ca2127cee594848 (patch) | |
tree | 8e630ae5e009c771e734178d7f7cd4db7e946aa5 | |
parent | 1e746e68f647eb42586ffab81fefa9f68723ed97 (diff) |
refactor: make labeler closures take NaiveParser
The second next commit will move errors out of the Token enum
but we still want to be able to test that the spans of errors
are character encoding independent.
-rw-r--r-- | tests/test_spans.rs | 81 |
1 files changed, 46 insertions, 35 deletions
diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 4de0714..f2cdc5f 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -1,3 +1,4 @@ +use std::convert::Infallible; use std::ops::Range; use codespan_reporting::{ @@ -14,22 +15,28 @@ use html5tokenizer::{ use insta::assert_snapshot; use similar_asserts::assert_eq; -fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> impl Iterator<Item = Token<usize>> +/// Just a convenient type alias for labeler closures since Rust +/// apparently cannot infer the type (requiring an annotation). +type Parser = NaiveParser< + PosTrackingReader<Box<dyn Reader<Error = Infallible>>>, + usize, + html5tokenizer::DefaultEmitter<usize>, +>; + +fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser where - R: Reader, + R: Reader<Error = Infallible> + 'static, { - NaiveParser::new(PosTrackingReader::new(reader)).flatten() + NaiveParser::new(PosTrackingReader::new( + Box::new(reader.into_reader()) as Box<dyn Reader<Error = Infallible>> + )) } -/// Just a convenient type alias for labeler closures calling `tokens.next()` -/// since Rust apparently cannot infer the type (requiring an annotation). -type TokenIter = Box<dyn Iterator<Item = Token<usize>>>; - fn test_and_annotate<S: AsRef<str> + Clone>( html: &'static str, - labeler: impl Fn(TokenIter) -> Vec<(Range<usize>, S)>, + labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>, ) -> String { - let labels = labeler(Box::new(parser(html))); + let labels = labeler(parser(html)); assert_char_encoding_independence(html, labeler); @@ -64,9 +71,9 @@ fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String #[test] fn start_tag_span() { let html = "<x> <xyz> <xyz > <xyz/>"; - let labeler = |tokens| { + let labeler = |parser: Parser| { let mut labels = Vec::new(); - for token in tokens { + for token in parser.flatten() { if let Token::StartTag(tag) = token { labels.push((tag.span, "")); } @@ -82,9 +89,9 @@ fn start_tag_span() { #[test] fn end_tag_span() { let html = "</x> </xyz> </xyz > </xyz/>"; - let labeler = |tokens| { + let labeler = |parser: Parser| { let mut labels = Vec::new(); - for token in tokens { + for token in parser.flatten() { if let Token::EndTag(tag) = token { labels.push((tag.span, "")); } @@ -100,9 +107,9 @@ fn end_tag_span() { #[test] fn start_tag_name_span() { let html = "<x> <xyz> <xyz > <xyz/>"; - let labeler = |tokens| { + let labeler = |parser: Parser| { let mut labels = Vec::new(); - for token in tokens { + for token in parser.flatten() { if let Token::StartTag(tag) = token { labels.push((tag.name_span, "")); } @@ -118,9 +125,9 @@ fn start_tag_name_span() { #[test] fn end_tag_name_span() { let html = "</x> </xyz> </xyz > </xyz/>"; - let labeler = |tokens| { + let labeler = |parser: Parser| { let mut labels = Vec::new(); - for token in tokens { + for token in parser.flatten() { if let Token::EndTag(tag) = token { labels.push((tag.name_span, "")); } @@ -136,9 +143,9 @@ fn end_tag_name_span() { #[test] fn attribute_name_span() { let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>"; - let labeler = |mut tokens: TokenIter| { + let labeler = |parser: Parser| { let mut labels = Vec::new(); - let Token::StartTag(tag) = tokens.next().unwrap() else { + let Token::StartTag(tag) = parser.flatten().next().unwrap() else { panic!("expected start tag") }; for attr in &tag.attributes { @@ -155,9 +162,9 @@ fn attribute_name_span() { #[test] fn attribute_value_span() { let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>"; - let labeler = |mut tokens: TokenIter| { + let labeler = |parser: Parser| { let mut labels = Vec::new(); - let Token::StartTag(tag) = tokens.next().unwrap() else { + let Token::StartTag(tag) = parser.flatten().next().unwrap() else { panic!("expected start tag") }; for attr in &tag.attributes { @@ -174,9 +181,9 @@ fn attribute_value_span() { #[test] fn attribute_value_with_char_ref() { let html = "<test x=& y='&' z=\"&\">"; - let labeler = |mut tokens: TokenIter| { + let labeler = |parser: Parser| { let mut labels = Vec::new(); - let Token::StartTag(tag) = tokens.next().unwrap() else { + let Token::StartTag(tag) = parser.flatten().next().unwrap() else { panic!("expected start tag") }; for attr in &tag.attributes { @@ -213,8 +220,9 @@ fn comment_data_span() { let mut annotated = String::new(); for case in cases { - let labeler = |tokens: TokenIter| { - let Token::Comment(comment) = tokens + let labeler = |parser: Parser| { + let Token::Comment(comment) = parser + .flatten() .filter(|t| !matches!(t, Token::Error { .. })) .next() .unwrap() @@ -258,6 +266,7 @@ fn comment_data_span() { for (idx, case) in cases.iter().enumerate() { let Token::Comment(comment) = parser(*case) + .flatten() .filter(|t| !matches!(t, Token::Error { .. })) .next() .unwrap() @@ -277,8 +286,9 @@ fn doctype_span() { let mut annotated = String::new(); for case in cases { - let labeler = |tokens: TokenIter| { - let Token::Doctype(doctype) = tokens + let labeler = |parser: Parser| { + let Token::Doctype(doctype) = parser + .flatten() .filter(|t| !matches!(t, Token::Error { .. })) .next() .unwrap() @@ -305,8 +315,9 @@ fn doctype_id_spans() { let mut annotated = String::new(); for case in cases { - let labeler = |tokens: TokenIter| { - let Token::Doctype(doctype) = tokens + let labeler = |parser: Parser| { + let Token::Doctype(doctype) = parser + .flatten() .filter(|t| !matches!(t, Token::Error { .. })) .next() .unwrap() @@ -340,7 +351,7 @@ fn doctype_id_spans() { } fn annotate_errors(html: &'static str) -> String { - for token in parser(html) { + for token in parser(html).flatten() { let Token::Error { span, .. } = token else { continue; }; @@ -354,9 +365,9 @@ fn annotate_errors(html: &'static str) -> String { } } - let labeler = |tokens| { + let labeler = |parser: Parser| { let mut labels = Vec::new(); - for token in tokens { + for token in parser.flatten() { let Token::Error { error, span } = token else { continue; }; @@ -540,10 +551,10 @@ fn error_invalid_first_character_of_tag_name() { fn assert_char_encoding_independence<S: AsRef<str> + Clone>( html: &'static str, - labeler: impl Fn(TokenIter) -> Vec<(Range<usize>, S)>, + labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>, ) { - let utf8_labels = labeler(Box::new(parser(html))); - let utf16_labels = labeler(Box::new(parser(Utf16Reader(html.into_reader())))); + let utf8_labels = labeler(parser(html)); + let utf16_labels = labeler(parser(Utf16Reader(html.into_reader()))); for (idx, (span, _)) in utf16_labels.into_iter().enumerate() { let expected_utf16_span = Range { |