diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-02 09:30:44 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 |
commit | 3eaa8598b5749e5d7554a223ef2079ebdb778730 (patch) | |
tree | 53e96735b7f5ab15c9480b03ba817faf6bc2b1b9 /tests/test_spans.rs | |
parent | cfd065ac32678486376a1c3cb47e2262708d1aa7 (diff) |
refactor: make span tests tokenizer-independent
Diffstat (limited to 'tests/test_spans.rs')
-rw-r--r-- | tests/test_spans.rs | 225 |
1 files changed, 145 insertions, 80 deletions
diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 3a195ad..db17328 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -14,6 +14,21 @@ fn tokenizer(html: &'static str) -> impl Iterator<Item = Token<usize>> { NaiveParser::new(PosTrackingReader::new(html)).flatten() } +/// Just a convenient type alias for labeler closures calling `tokens.next()` +/// since Rust apparently cannot infer the type (requiring an annotation). +type TokenIter = Box<dyn Iterator<Item = Token<usize>>>; + +fn test_and_annotate<S: AsRef<str> + Clone>( + html: &'static str, + labeler: impl Fn(TokenIter) -> Vec<(Range<usize>, S)>, +) -> String { + let labels = labeler(Box::new(tokenizer(html))); + + // TODO: assert character encoding independence here once all tests support it + + annotate(html, labels) +} + fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String { let mut files = SimpleFiles::new(); let file_id = files.add("test.html", html); @@ -42,13 +57,16 @@ fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String #[test] fn start_tag_span() { let html = "<x> <xyz> <xyz > <xyz/>"; - let mut labels = Vec::new(); - for token in tokenizer(html) { - if let Token::StartTag(tag) = token { - labels.push((tag.span, "")); + let labeler = |tokens| { + let mut labels = Vec::new(); + for token in tokens { + if let Token::StartTag(tag) = token { + labels.push((tag.span, "")); + } } - } - assert_snapshot!(annotate(html, labels), @r###" + labels + }; + assert_snapshot!(test_and_annotate(html, labeler), @r###" <x> <xyz> <xyz > <xyz/> ^^^ ^^^^^ ^^^^^^^ ^^^^^^ "###); @@ -57,13 +75,16 @@ fn start_tag_span() { #[test] fn end_tag_span() { let html = "</x> </xyz> </xyz > </xyz/>"; - let mut labels = Vec::new(); - for token in tokenizer(html) { - if let Token::EndTag(tag) = token { - labels.push((tag.span, "")); + let labeler = |tokens| { + let mut labels = Vec::new(); + for token in tokens { + if let Token::EndTag(tag) = token { + labels.push((tag.span, "")); + } } - } - assert_snapshot!(annotate(html, labels), @r###" + labels + }; + assert_snapshot!(test_and_annotate(html, labeler), @r###" </x> </xyz> </xyz > </xyz/> ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^ "###); @@ -72,13 +93,16 @@ fn end_tag_span() { #[test] fn start_tag_name_span() { let html = "<x> <xyz> <xyz > <xyz/>"; - let mut labels = Vec::new(); - for token in tokenizer(html) { - if let Token::StartTag(tag) = token { - labels.push((tag.name_span(), "")); + let labeler = |tokens| { + let mut labels = Vec::new(); + for token in tokens { + if let Token::StartTag(tag) = token { + labels.push((tag.name_span(), "")); + } } - } - assert_snapshot!(annotate(html, labels), @r###" + labels + }; + assert_snapshot!(test_and_annotate(html, labeler), @r###" <x> <xyz> <xyz > <xyz/> ^ ^^^ ^^^ ^^^ "###); @@ -87,13 +111,16 @@ fn start_tag_name_span() { #[test] fn end_tag_name_span() { let html = "</x> </xyz> </xyz > </xyz/>"; - let mut labels = Vec::new(); - for token in tokenizer(html) { - if let Token::EndTag(tag) = token { - labels.push((tag.name_span(), "")); + let labeler = |tokens| { + let mut labels = Vec::new(); + for token in tokens { + if let Token::EndTag(tag) = token { + labels.push((tag.name_span(), "")); + } } - } - assert_snapshot!(annotate(html, labels), @r###" + labels + }; + assert_snapshot!(test_and_annotate(html, labeler), @r###" </x> </xyz> </xyz > </xyz/> ^ ^^^ ^^^ ^^^ "###); @@ -102,14 +129,17 @@ fn end_tag_name_span() { #[test] fn attribute_name_span() { let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>"; - let mut labels = Vec::new(); - let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { - panic!("expected start tag") + let labeler = |mut tokens: TokenIter| { + let mut labels = Vec::new(); + let Token::StartTag(tag) = tokens.next().unwrap() else { + panic!("expected start tag") + }; + for attr in &tag.attributes { + labels.push((attr.name_span(), "")); + } + labels }; - for attr in &tag.attributes { - labels.push((attr.name_span(), "")); - } - assert_snapshot!(annotate(html, labels), @r###" + assert_snapshot!(test_and_annotate(html, labeler), @r###" <test x xyz y=VAL xy=VAL z = VAL yzx = VAL> ^ ^^^ ^ ^^ ^ ^^^ "###); @@ -118,14 +148,17 @@ fn attribute_name_span() { #[test] fn attribute_value_span() { let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>"; - let mut labels = Vec::new(); - let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { - panic!("expected start tag") + let labeler = |mut tokens: TokenIter| { + let mut labels = Vec::new(); + let Token::StartTag(tag) = tokens.next().unwrap() else { + panic!("expected start tag") + }; + for attr in &tag.attributes { + labels.push((attr.value_span().unwrap(), "")); + } + labels }; - for attr in &tag.attributes { - labels.push((attr.value_span().unwrap(), "")); - } - assert_snapshot!(annotate(html, labels), @r###" + assert_snapshot!(test_and_annotate(html, labeler), @r###" <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''> ^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^ "###); @@ -134,14 +167,17 @@ fn attribute_value_span() { #[test] fn attribute_value_with_char_ref() { let html = "<test x=& y='&' z=\"&\">"; - let mut labels = Vec::new(); - let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { - panic!("expected start tag") + let labeler = |mut tokens: TokenIter| { + let mut labels = Vec::new(); + let Token::StartTag(tag) = tokens.next().unwrap() else { + panic!("expected start tag") + }; + for attr in &tag.attributes { + labels.push((attr.value_span().unwrap(), "")); + } + labels }; - for attr in &tag.attributes { - labels.push((attr.value_span().unwrap(), "")); - } - assert_snapshot!(annotate(html, labels), @r###" + assert_snapshot!(test_and_annotate(html, labeler), @r###" <test x=& y='&' z="&"> ^^^^^ ^^^^^ ^^^^^ "###); @@ -159,15 +195,17 @@ fn comment_data_span() { let mut annotated = String::new(); for case in cases { - let Token::Comment(comment) = tokenizer(case) - .filter(|t| !matches!(t, Token::Error { .. })) - .next() - .unwrap() - else { - panic!("expected comment"); + let labeler = |tokens: TokenIter| { + let Token::Comment(comment) = tokens + .filter(|t| !matches!(t, Token::Error { .. })) + .next() + .unwrap() + else { + panic!("expected comment"); + }; + vec![(comment.data_span(), "")] }; - assert_eq!(case[comment.data_span()], comment.data); - annotated.push_str(&annotate(case, vec![(comment.data_span(), "")])); + annotated.push_str(&test_and_annotate(case, labeler)); } assert_snapshot!(annotated, @r###" @@ -176,6 +214,17 @@ fn comment_data_span() { <! Why are you looking at the source code? --> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ "###); + + for (idx, case) in cases.iter().enumerate() { + let Token::Comment(comment) = tokenizer(case) + .filter(|t| !matches!(t, Token::Error { .. })) + .next() + .unwrap() + else { + panic!("expected comment"); + }; + assert_eq!(case[comment.data_span()], comment.data, "case {idx}"); + } } #[test] @@ -187,14 +236,17 @@ fn doctype_span() { let mut annotated = String::new(); for case in cases { - let Token::Doctype(doctype) = tokenizer(case) - .filter(|t| !matches!(t, Token::Error { .. })) - .next() - .unwrap() - else { - panic!("expected doctype"); + let labeler = |tokens: TokenIter| { + let Token::Doctype(doctype) = tokens + .filter(|t| !matches!(t, Token::Error { .. })) + .next() + .unwrap() + else { + panic!("expected doctype"); + }; + vec![(doctype.span, "")] }; - annotated.push_str(&annotate(case, vec![(doctype.span, "")])); + annotated.push_str(&test_and_annotate(case, labeler)); } assert_snapshot!(annotated, @r###" @@ -212,22 +264,26 @@ fn doctype_id_spans() { let mut annotated = String::new(); for case in cases { - let Token::Doctype(doctype) = tokenizer(case) - .filter(|t| !matches!(t, Token::Error { .. })) - .next() - .unwrap() - else { - panic!("expected doctype"); + let labeler = |tokens: TokenIter| { + let Token::Doctype(doctype) = tokens + .filter(|t| !matches!(t, Token::Error { .. })) + .next() + .unwrap() + else { + panic!("expected doctype"); + }; + + let mut labels = Vec::new(); + if let Some(public_id_span) = doctype.public_id_span() { + labels.push((public_id_span, "public id")); + } + if let Some(system_id_span) = doctype.system_id_span() { + labels.push((system_id_span, "system id")); + } + labels }; - let mut labels = Vec::new(); - if let Some(public_id_span) = doctype.public_id_span() { - labels.push((public_id_span, "public id")); - } - if let Some(system_id_span) = doctype.system_id_span() { - labels.push((system_id_span, "system id")); - } - annotated.push_str(&annotate(case, labels)); + annotated.push_str(&test_and_annotate(case, labeler)); } assert_snapshot!(annotated, @r###" @@ -239,12 +295,10 @@ fn doctype_id_spans() { } fn annotate_errors(html: &'static str) -> String { - let mut labels = Vec::new(); for token in tokenizer(html) { - let Token::Error { error, span } = token else { + let Token::Error { span, .. } = token else { continue; }; - if span.start == span.end { if span.start != html.len() { panic!("empty error spans are only allowed at the very end of the source (for eof errors)"); @@ -253,10 +307,21 @@ fn annotate_errors(html: &'static str) -> String { assert!(span.start < span.end); assert!(span.end <= html.len()); } - - labels.push((span, error.code())); } - annotate(html, labels) + + let labeler = |tokens| { + let mut labels = Vec::new(); + for token in tokens { + let Token::Error { error, span } = token else { + continue; + }; + + labels.push((span, error.code())); + } + labels + }; + + test_and_annotate(html, labeler) } #[test] |