aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-02 09:30:44 +0200
committerMartin Fischer <martin@push-f.com>2023-09-03 23:00:05 +0200
commit3eaa8598b5749e5d7554a223ef2079ebdb778730 (patch)
tree53e96735b7f5ab15c9480b03ba817faf6bc2b1b9
parentcfd065ac32678486376a1c3cb47e2262708d1aa7 (diff)
refactor: make span tests tokenizer-independent
-rw-r--r--tests/test_spans.rs225
1 files changed, 145 insertions, 80 deletions
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index 3a195ad..db17328 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -14,6 +14,21 @@ fn tokenizer(html: &'static str) -> impl Iterator<Item = Token<usize>> {
NaiveParser::new(PosTrackingReader::new(html)).flatten()
}
+/// Just a convenient type alias for labeler closures calling `tokens.next()`
+/// since Rust apparently cannot infer the type (requiring an annotation).
+type TokenIter = Box<dyn Iterator<Item = Token<usize>>>;
+
+fn test_and_annotate<S: AsRef<str> + Clone>(
+ html: &'static str,
+ labeler: impl Fn(TokenIter) -> Vec<(Range<usize>, S)>,
+) -> String {
+ let labels = labeler(Box::new(tokenizer(html)));
+
+ // TODO: assert character encoding independence here once all tests support it
+
+ annotate(html, labels)
+}
+
fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String {
let mut files = SimpleFiles::new();
let file_id = files.add("test.html", html);
@@ -42,13 +57,16 @@ fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String
#[test]
fn start_tag_span() {
let html = "<x> <xyz> <xyz > <xyz/>";
- let mut labels = Vec::new();
- for token in tokenizer(html) {
- if let Token::StartTag(tag) = token {
- labels.push((tag.span, ""));
+ let labeler = |tokens| {
+ let mut labels = Vec::new();
+ for token in tokens {
+ if let Token::StartTag(tag) = token {
+ labels.push((tag.span, ""));
+ }
}
- }
- assert_snapshot!(annotate(html, labels), @r###"
+ labels
+ };
+ assert_snapshot!(test_and_annotate(html, labeler), @r###"
<x> <xyz> <xyz > <xyz/>
^^^ ^^^^^ ^^^^^^^ ^^^^^^
"###);
@@ -57,13 +75,16 @@ fn start_tag_span() {
#[test]
fn end_tag_span() {
let html = "</x> </xyz> </xyz > </xyz/>";
- let mut labels = Vec::new();
- for token in tokenizer(html) {
- if let Token::EndTag(tag) = token {
- labels.push((tag.span, ""));
+ let labeler = |tokens| {
+ let mut labels = Vec::new();
+ for token in tokens {
+ if let Token::EndTag(tag) = token {
+ labels.push((tag.span, ""));
+ }
}
- }
- assert_snapshot!(annotate(html, labels), @r###"
+ labels
+ };
+ assert_snapshot!(test_and_annotate(html, labeler), @r###"
</x> </xyz> </xyz > </xyz/>
^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^
"###);
@@ -72,13 +93,16 @@ fn end_tag_span() {
#[test]
fn start_tag_name_span() {
let html = "<x> <xyz> <xyz > <xyz/>";
- let mut labels = Vec::new();
- for token in tokenizer(html) {
- if let Token::StartTag(tag) = token {
- labels.push((tag.name_span(), ""));
+ let labeler = |tokens| {
+ let mut labels = Vec::new();
+ for token in tokens {
+ if let Token::StartTag(tag) = token {
+ labels.push((tag.name_span(), ""));
+ }
}
- }
- assert_snapshot!(annotate(html, labels), @r###"
+ labels
+ };
+ assert_snapshot!(test_and_annotate(html, labeler), @r###"
<x> <xyz> <xyz > <xyz/>
^ ^^^ ^^^ ^^^
"###);
@@ -87,13 +111,16 @@ fn start_tag_name_span() {
#[test]
fn end_tag_name_span() {
let html = "</x> </xyz> </xyz > </xyz/>";
- let mut labels = Vec::new();
- for token in tokenizer(html) {
- if let Token::EndTag(tag) = token {
- labels.push((tag.name_span(), ""));
+ let labeler = |tokens| {
+ let mut labels = Vec::new();
+ for token in tokens {
+ if let Token::EndTag(tag) = token {
+ labels.push((tag.name_span(), ""));
+ }
}
- }
- assert_snapshot!(annotate(html, labels), @r###"
+ labels
+ };
+ assert_snapshot!(test_and_annotate(html, labeler), @r###"
</x> </xyz> </xyz > </xyz/>
^ ^^^ ^^^ ^^^
"###);
@@ -102,14 +129,17 @@ fn end_tag_name_span() {
#[test]
fn attribute_name_span() {
let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>";
- let mut labels = Vec::new();
- let Token::StartTag(tag) = tokenizer(html).next().unwrap() else {
- panic!("expected start tag")
+ let labeler = |mut tokens: TokenIter| {
+ let mut labels = Vec::new();
+ let Token::StartTag(tag) = tokens.next().unwrap() else {
+ panic!("expected start tag")
+ };
+ for attr in &tag.attributes {
+ labels.push((attr.name_span(), ""));
+ }
+ labels
};
- for attr in &tag.attributes {
- labels.push((attr.name_span(), ""));
- }
- assert_snapshot!(annotate(html, labels), @r###"
+ assert_snapshot!(test_and_annotate(html, labeler), @r###"
<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>
^ ^^^ ^ ^^ ^ ^^^
"###);
@@ -118,14 +148,17 @@ fn attribute_name_span() {
#[test]
fn attribute_value_span() {
let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>";
- let mut labels = Vec::new();
- let Token::StartTag(tag) = tokenizer(html).next().unwrap() else {
- panic!("expected start tag")
+ let labeler = |mut tokens: TokenIter| {
+ let mut labels = Vec::new();
+ let Token::StartTag(tag) = tokens.next().unwrap() else {
+ panic!("expected start tag")
+ };
+ for attr in &tag.attributes {
+ labels.push((attr.value_span().unwrap(), ""));
+ }
+ labels
};
- for attr in &tag.attributes {
- labels.push((attr.value_span().unwrap(), ""));
- }
- assert_snapshot!(annotate(html, labels), @r###"
+ assert_snapshot!(test_and_annotate(html, labeler), @r###"
<test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''>
^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^
"###);
@@ -134,14 +167,17 @@ fn attribute_value_span() {
#[test]
fn attribute_value_with_char_ref() {
let html = "<test x=&amp; y='&amp;' z=\"&amp;\">";
- let mut labels = Vec::new();
- let Token::StartTag(tag) = tokenizer(html).next().unwrap() else {
- panic!("expected start tag")
+ let labeler = |mut tokens: TokenIter| {
+ let mut labels = Vec::new();
+ let Token::StartTag(tag) = tokens.next().unwrap() else {
+ panic!("expected start tag")
+ };
+ for attr in &tag.attributes {
+ labels.push((attr.value_span().unwrap(), ""));
+ }
+ labels
};
- for attr in &tag.attributes {
- labels.push((attr.value_span().unwrap(), ""));
- }
- assert_snapshot!(annotate(html, labels), @r###"
+ assert_snapshot!(test_and_annotate(html, labeler), @r###"
<test x=&amp; y='&amp;' z="&amp;">
^^^^^ ^^^^^ ^^^^^
"###);
@@ -159,15 +195,17 @@ fn comment_data_span() {
let mut annotated = String::new();
for case in cases {
- let Token::Comment(comment) = tokenizer(case)
- .filter(|t| !matches!(t, Token::Error { .. }))
- .next()
- .unwrap()
- else {
- panic!("expected comment");
+ let labeler = |tokens: TokenIter| {
+ let Token::Comment(comment) = tokens
+ .filter(|t| !matches!(t, Token::Error { .. }))
+ .next()
+ .unwrap()
+ else {
+ panic!("expected comment");
+ };
+ vec![(comment.data_span(), "")]
};
- assert_eq!(case[comment.data_span()], comment.data);
- annotated.push_str(&annotate(case, vec![(comment.data_span(), "")]));
+ annotated.push_str(&test_and_annotate(case, labeler));
}
assert_snapshot!(annotated, @r###"
@@ -176,6 +214,17 @@ fn comment_data_span() {
<! Why are you looking at the source code? -->
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
"###);
+
+ for (idx, case) in cases.iter().enumerate() {
+ let Token::Comment(comment) = tokenizer(case)
+ .filter(|t| !matches!(t, Token::Error { .. }))
+ .next()
+ .unwrap()
+ else {
+ panic!("expected comment");
+ };
+ assert_eq!(case[comment.data_span()], comment.data, "case {idx}");
+ }
}
#[test]
@@ -187,14 +236,17 @@ fn doctype_span() {
let mut annotated = String::new();
for case in cases {
- let Token::Doctype(doctype) = tokenizer(case)
- .filter(|t| !matches!(t, Token::Error { .. }))
- .next()
- .unwrap()
- else {
- panic!("expected doctype");
+ let labeler = |tokens: TokenIter| {
+ let Token::Doctype(doctype) = tokens
+ .filter(|t| !matches!(t, Token::Error { .. }))
+ .next()
+ .unwrap()
+ else {
+ panic!("expected doctype");
+ };
+ vec![(doctype.span, "")]
};
- annotated.push_str(&annotate(case, vec![(doctype.span, "")]));
+ annotated.push_str(&test_and_annotate(case, labeler));
}
assert_snapshot!(annotated, @r###"
@@ -212,22 +264,26 @@ fn doctype_id_spans() {
let mut annotated = String::new();
for case in cases {
- let Token::Doctype(doctype) = tokenizer(case)
- .filter(|t| !matches!(t, Token::Error { .. }))
- .next()
- .unwrap()
- else {
- panic!("expected doctype");
+ let labeler = |tokens: TokenIter| {
+ let Token::Doctype(doctype) = tokens
+ .filter(|t| !matches!(t, Token::Error { .. }))
+ .next()
+ .unwrap()
+ else {
+ panic!("expected doctype");
+ };
+
+ let mut labels = Vec::new();
+ if let Some(public_id_span) = doctype.public_id_span() {
+ labels.push((public_id_span, "public id"));
+ }
+ if let Some(system_id_span) = doctype.system_id_span() {
+ labels.push((system_id_span, "system id"));
+ }
+ labels
};
- let mut labels = Vec::new();
- if let Some(public_id_span) = doctype.public_id_span() {
- labels.push((public_id_span, "public id"));
- }
- if let Some(system_id_span) = doctype.system_id_span() {
- labels.push((system_id_span, "system id"));
- }
- annotated.push_str(&annotate(case, labels));
+ annotated.push_str(&test_and_annotate(case, labeler));
}
assert_snapshot!(annotated, @r###"
@@ -239,12 +295,10 @@ fn doctype_id_spans() {
}
fn annotate_errors(html: &'static str) -> String {
- let mut labels = Vec::new();
for token in tokenizer(html) {
- let Token::Error { error, span } = token else {
+ let Token::Error { span, .. } = token else {
continue;
};
-
if span.start == span.end {
if span.start != html.len() {
panic!("empty error spans are only allowed at the very end of the source (for eof errors)");
@@ -253,10 +307,21 @@ fn annotate_errors(html: &'static str) -> String {
assert!(span.start < span.end);
assert!(span.end <= html.len());
}
-
- labels.push((span, error.code()));
}
- annotate(html, labels)
+
+ let labeler = |tokens| {
+ let mut labels = Vec::new();
+ for token in tokens {
+ let Token::Error { error, span } = token else {
+ continue;
+ };
+
+ labels.push((span, error.code()));
+ }
+ labels
+ };
+
+ test_and_annotate(html, labeler)
}
#[test]