aboutsummaryrefslogtreecommitdiff
path: root/tests/test_spans.rs
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-27 09:25:12 +0200
committerMartin Fischer <martin@push-f.com>2023-09-28 11:00:01 +0200
commitb027ecdb397c2e378491f847660f8eeb740e8cf6 (patch)
tree1f910e8974c1f37706b3ab78d4214977b36fe74a /tests/test_spans.rs
parent635a571ee76bf7fdaaf01c204f30289489b80c1a (diff)
chore: rename integration tests
Diffstat (limited to 'tests/test_spans.rs')
-rw-r--r--tests/test_spans.rs626
1 files changed, 0 insertions, 626 deletions
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
deleted file mode 100644
index b10808c..0000000
--- a/tests/test_spans.rs
+++ /dev/null
@@ -1,626 +0,0 @@
-use std::convert::Infallible;
-use std::ops::Range;
-
-use codespan_reporting::{
- self,
- diagnostic::{Diagnostic, Label},
- files::SimpleFiles,
- term::{self, termcolor::Buffer},
-};
-use html5tokenizer::{
- offset::PosTrackingReader,
- reader::{IntoReader, Reader},
- trace::Trace,
- NaiveParser, Token,
-};
-use insta::assert_snapshot;
-use similar_asserts::assert_eq;
-
-/// Just a convenient type alias for labeler closures since Rust
-/// apparently cannot infer the type (requiring an annotation).
-type Parser = NaiveParser<
- PosTrackingReader<Box<dyn Reader<Error = Infallible>>>,
- usize,
- html5tokenizer::TracingEmitter,
->;
-
-fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser
-where
- R: Reader<Error = Infallible> + 'static,
-{
- NaiveParser::new_with_emitter(
- PosTrackingReader::new(
- Box::new(reader.into_reader()) as Box<dyn Reader<Error = Infallible>>
- ),
- html5tokenizer::TracingEmitter::default(),
- )
-}
-
-fn test_and_annotate<S: AsRef<str> + Clone>(
- html: &'static str,
- labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>,
-) -> String {
- let labels = labeler(parser(html));
-
- assert_char_encoding_independence(html, labeler);
-
- annotate(html, labels)
-}
-
-fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String {
- let mut files = SimpleFiles::new();
- let file_id = files.add("test.html", html);
-
- let diagnostic = Diagnostic::note().with_labels(
- labels
- .into_iter()
- .map(|(span, text)| Label::primary(file_id, span).with_message(text.as_ref()))
- .collect(),
- );
-
- let mut writer = Buffer::no_color();
- let config = codespan_reporting::term::Config::default();
- term::emit(&mut writer, &config, &files, &diagnostic).unwrap();
- let msg = std::str::from_utf8(writer.as_slice()).unwrap();
-
- // strip the filename and the line numbers since we don't need them
- // (apparently they cannot be disabled in codespan_reporting)
- msg.lines()
- .skip(3)
- .flat_map(|l| l.split_once("│ ").map(|s| format!("{}\n", s.1.trim_end())))
- .collect::<Vec<_>>()
- .join("")
-}
-
-#[test]
-fn char_span() {
- let html = "X &amp; &doesntexist; &#1123; </";
- let labeler = |parser: Parser| {
- let mut labels = Vec::new();
- for token_trace in parser.flatten() {
- if let (Token::Char(c), Trace::Char(span)) = token_trace {
- if c != ' ' {
- labels.push((span, ""));
- }
- }
- }
- labels
- };
- assert_snapshot!(test_and_annotate(html, labeler), @r###"
- X &amp; &doesntexist; &#1123; </
- ^ ^^^^^ ^^^^^^^^^^^^^ ^^^^^^^ ^^
- "###);
-}
-
-#[test]
-fn start_tag_span() {
- let html = "<x> <xyz> <xyz > <xyz/>";
- let labeler = |parser: Parser| {
- let mut labels = Vec::new();
- for (_, trace) in parser.flatten() {
- if let Trace::StartTag(trace) = trace {
- labels.push((trace.span, ""));
- }
- }
- labels
- };
- assert_snapshot!(test_and_annotate(html, labeler), @r###"
- <x> <xyz> <xyz > <xyz/>
- ^^^ ^^^^^ ^^^^^^^ ^^^^^^
- "###);
-}
-
-#[test]
-fn end_tag_span() {
- let html = "</x> </xyz> </xyz > </xyz/>";
- let labeler = |parser: Parser| {
- let mut labels = Vec::new();
- for (_, trace) in parser.flatten() {
- if let Trace::EndTag(trace) = trace {
- labels.push((trace.span, ""));
- }
- }
- labels
- };
- assert_snapshot!(test_and_annotate(html, labeler), @r###"
- </x> </xyz> </xyz > </xyz/>
- ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^
- "###);
-}
-
-#[test]
-fn start_tag_name_span() {
- let html = "<x> <xyz> <xyz > <xyz/>";
- let labeler = |parser: Parser| {
- let mut labels = Vec::new();
- for (_, trace) in parser.flatten() {
- if let Trace::StartTag(trace) = trace {
- labels.push((trace.name_span, ""));
- }
- }
- labels
- };
- assert_snapshot!(test_and_annotate(html, labeler), @r###"
- <x> <xyz> <xyz > <xyz/>
- ^ ^^^ ^^^ ^^^
- "###);
-}
-
-#[test]
-fn end_tag_name_span() {
- let html = "</x> </xyz> </xyz > </xyz/>";
- let labeler = |parser: Parser| {
- let mut labels = Vec::new();
- for (_, trace) in parser.flatten() {
- if let Trace::EndTag(trace) = trace {
- labels.push((trace.name_span, ""));
- }
- }
- labels
- };
- assert_snapshot!(test_and_annotate(html, labeler), @r###"
- </x> </xyz> </xyz > </xyz/>
- ^ ^^^ ^^^ ^^^
- "###);
-}
-
-#[test]
-fn attribute_name_span() {
- let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>";
- let labeler = |parser: Parser| {
- let mut labels = Vec::new();
- let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
- else {
- panic!("expected start tag")
- };
- for attr in &tag.attributes {
- labels.push((
- trace.attribute_traces[attr.trace_idx().unwrap()].name_span(),
- "",
- ));
- }
- labels
- };
- assert_snapshot!(test_and_annotate(html, labeler), @r###"
- <test x xyz y=VAL xy=VAL z = VAL yzx = VAL>
- ^ ^^^ ^ ^^ ^ ^^^
- "###);
-}
-
-#[test]
-fn attribute_value_span() {
- let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>";
- let labeler = |parser: Parser| {
- let mut labels = Vec::new();
- let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
- else {
- panic!("expected start tag")
- };
- for attr in &tag.attributes {
- labels.push((
- trace.attribute_traces[attr.trace_idx().unwrap()]
- .value_span()
- .unwrap(),
- "",
- ));
- }
- labels
- };
- assert_snapshot!(test_and_annotate(html, labeler), @r###"
- <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''>
- ^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^
- "###);
-}
-
-#[test]
-fn attribute_value_with_char_ref() {
- let html = "<test x=&amp; y='&amp;' z=\"&amp;\">";
- let labeler = |parser: Parser| {
- let mut labels = Vec::new();
- let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
- else {
- panic!("expected start tag")
- };
- for attr in &tag.attributes {
- labels.push((
- trace.attribute_traces[attr.trace_idx().unwrap()]
- .value_span()
- .unwrap(),
- "",
- ));
- }
- labels
- };
- assert_snapshot!(test_and_annotate(html, labeler), @r###"
- <test x=&amp; y='&amp;' z="&amp;">
- ^^^^^ ^^^^^ ^^^^^
- "###);
-}
-
-#[test]
-fn comment_data_span() {
- #[rustfmt::skip]
- let cases = [
- "<!-- Why are you looking at the source code? -->",
- "<!-- Why are you looking at the source code? --",
- "<!-- Why are you looking at the source code? -",
- "<!-- Why are you looking at the source code?",
- "<!--",
- "<!-->",
- "<!---",
- "<!--->",
- "<!-- Why are you looking at the source code? ->",
- "<!-- Why are you looking at the source code? --!>",
- "<!-- Why are you looking at the source code? --!",
-
- // bogus comments
- "<! Why are you looking at the source code? -->",
- "<!",
- ];
-
- let mut annotated = String::new();
- for case in cases {
- let labeler = |parser: Parser| {
- let (_, Trace::Comment(comment)) = parser.flatten().next().unwrap() else {
- panic!("expected comment");
- };
- vec![(comment.data_span, "")]
- };
-
- annotated.push_str(&test_and_annotate(case, labeler));
- }
-
- assert_snapshot!(annotated, @r###"
- <!-- Why are you looking at the source code? -->
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- <!-- Why are you looking at the source code? --
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- <!-- Why are you looking at the source code? -
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- <!-- Why are you looking at the source code?
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- <!--
- ^
- <!-->
- ^
- <!---
- ^
- <!--->
- ^
- <!-- Why are you looking at the source code? ->
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- <!-- Why are you looking at the source code? --!>
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- <!-- Why are you looking at the source code? --!
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- <! Why are you looking at the source code? -->
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- <!
- ^
- "###);
-
- for (idx, case) in cases.iter().enumerate() {
- let (Token::Comment(data), Trace::Comment(trace)) = parser(*case).flatten().next().unwrap()
- else {
- panic!("expected comment");
- };
- assert_eq!(case[trace.data_span], data, "case {idx}");
- }
-}
-
-#[test]
-fn doctype_span() {
- #[rustfmt::skip]
- let cases = [
- r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" >"#,
- ];
-
- let mut annotated = String::new();
- for case in cases {
- let labeler = |parser: Parser| {
- let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else {
- panic!("expected doctype");
- };
- vec![(trace.span(), "")]
- };
- annotated.push_str(&test_and_annotate(case, labeler));
- }
-
- assert_snapshot!(annotated, @r###"
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" >
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- "###);
-}
-
-#[test]
-fn doctype_id_spans() {
- #[rustfmt::skip]
- let cases = [
- r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#,
- ];
-
- let mut annotated = String::new();
- for case in cases {
- let labeler = |parser: Parser| {
- let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else {
- panic!("expected doctype");
- };
-
- let mut labels = Vec::new();
- if let Some(name_span) = trace.name_span() {
- labels.push((name_span, "name"));
- }
- if let Some(public_id_span) = trace.public_id_span() {
- labels.push((public_id_span, "public id"));
- }
- if let Some(system_id_span) = trace.system_id_span() {
- labels.push((system_id_span, "system id"));
- }
- labels
- };
-
- annotated.push_str(&test_and_annotate(case, labeler));
- }
-
- assert_snapshot!(annotated, @r###"
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
- ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id
- │ │
- │ public id
- name
- "###);
-}
-
-#[test]
-fn eof_offset() {
- let html = "Where does it end?";
- let labeler = |parser: Parser| {
- let mut labels = Vec::new();
- for (_, trace) in parser.flatten() {
- if let Trace::EndOfFile(offset) = trace {
- labels.push((offset..offset, "here"));
- }
- }
- labels
- };
- assert_snapshot!(test_and_annotate(html, labeler), @r###"
- Where does it end?
- ^ here
- "###);
-}
-
-fn annotate_errors(html: &'static str) -> String {
- let mut parser = parser(html);
- for _ in parser.by_ref() {}
- let errors: Vec<_> = parser.emitter_mut().drain_errors().collect();
-
- for (_, span) in errors {
- if span.start == span.end {
- if span.start != html.len() {
- panic!("empty error spans are only allowed at the very end of the source (for eof errors)");
- }
- } else {
- assert!(span.start < span.end);
- assert!(span.end <= html.len());
- }
- }
-
- let labeler = |mut parser: Parser| {
- let mut labels = Vec::new();
- for _ in parser.by_ref() {}
- for (error, span) in parser.emitter_mut().drain_errors() {
- labels.push((span, error.code()));
- }
- labels
- };
-
- test_and_annotate(html, labeler)
-}
-
-#[test]
-fn tests_for_errors_are_sorted() {
- let source_of_this_file = std::fs::read_to_string(file!()).unwrap();
- let mut error_tests: Vec<_> = source_of_this_file
- .lines()
- .filter(|l| l.starts_with("fn error_"))
- .collect();
- let error_tests_found_order = error_tests.join("\n");
- error_tests.sort();
- let error_tests_sorted = error_tests.join("\n");
- assert_eq!(error_tests_found_order, error_tests_sorted);
-}
-
-#[test]
-fn error_char_ref_absence_of_digits() {
- let html = "&#qux;";
- assert_snapshot!(annotate_errors(html), @r###"
- &#qux;
- ^^^ absence-of-digits-in-numeric-character-reference
- "###);
-}
-
-#[test]
-fn error_char_ref_control_char() {
- let html = "&#127;";
- assert_snapshot!(annotate_errors(html), @r###"
- &#127;
- ^^^^^^ control-character-reference
- "###);
-}
-
-#[test]
-fn error_char_ref_missing_semicolon() {
- let html = "&not";
- assert_snapshot!(annotate_errors(html), @r###"
- &not
- ^ missing-semicolon-after-character-reference
- "###);
-}
-
-#[test]
-fn error_char_ref_noncharacter() {
- let html = "&#xFDD0;";
- assert_snapshot!(annotate_errors(html), @r###"
- &#xFDD0;
- ^^^^^^^^ noncharacter-character-reference
- "###);
-}
-
-#[test]
-fn error_char_ref_null_char() {
- let html = "&#0;";
- assert_snapshot!(annotate_errors(html), @r###"
- &#0;
- ^^^^ null-character-reference
- "###);
-}
-
-#[test]
-fn error_char_ref_outside_unicode_range() {
- let html = "&#9999999;";
- assert_snapshot!(annotate_errors(html), @r###"
- &#9999999;
- ^^^^^^^^^^ character-reference-outside-unicode-range
- "###);
-}
-
-#[test]
-fn error_char_ref_surrogate() {
- let html = "&#xD800;";
- assert_snapshot!(annotate_errors(html), @r###"
- &#xD800;
- ^^^^^^^^ surrogate-character-reference
- "###);
-}
-
-#[test]
-fn error_char_ref_unknown_named() {
- let html = "The pirate says &arrrrr;";
- assert_snapshot!(annotate_errors(html), @r###"
- The pirate says &arrrrr;
- ^^^^^^^^ unknown-named-character-reference
- "###);
-}
-
-#[test]
-fn error_duplicate_attribute() {
- let html = "Does this open two pages? <a href=foo.html href=bar.html>";
- assert_snapshot!(annotate_errors(html), @r###"
- Does this open two pages? <a href=foo.html href=bar.html>
- ^^^^ duplicate-attribute
- "###);
-}
-
-#[test]
-fn error_end_tag_with_attributes() {
- let html = "</end-tag first second=value>";
- assert_snapshot!(annotate_errors(html), @r###"
- </end-tag first second=value>
- ^^^^^^ end-tag-with-attributes
- "###);
-}
-
-#[test]
-fn error_end_tag_with_trailing_solidus() {
- let html = "Do you start or do you end? </yes/>";
- assert_snapshot!(annotate_errors(html), @r###"
- Do you start or do you end? </yes/>
- ^ end-tag-with-trailing-solidus
- "###);
-}
-
-#[test]
-fn error_eof_before_tag_name() {
- let html = "<";
- assert_snapshot!(annotate_errors(html), @r###"
- <
- ^ eof-before-tag-name
- "###);
-}
-
-// TODO: add error_eof_in_cdata test
-// blocked by lack of proper tree constructor (NaiveParser doesn't parse CDATA sections)
-
-#[test]
-fn error_eof_in_comment() {
- let html = "<!--";
- assert_snapshot!(annotate_errors(html), @r###"
- <!--
- ^ eof-in-comment
- "###);
-}
-
-#[test]
-fn error_eof_in_doctype() {
- let html = "<!doctype html";
- assert_snapshot!(annotate_errors(html), @r###"
- <!doctype html
- ^ eof-in-doctype
- "###);
-}
-
-#[test]
-fn error_eof_in_script_html_comment_like_text() {
- let html = "<script><!--";
- assert_snapshot!(annotate_errors(html), @r###"
- <script><!--
- ^ eof-in-script-html-comment-like-text
- "###);
-}
-
-#[test]
-fn error_eof_in_tag() {
- let html = "</sarcasm";
- assert_snapshot!(annotate_errors(html), @r###"
- </sarcasm
- ^ eof-in-tag
- "###);
-}
-
-#[test]
-fn error_invalid_first_character_of_tag_name() {
- let html = "Please mind the gap: < test";
- assert_snapshot!(annotate_errors(html), @r###"
- Please mind the gap: < test
- ^ invalid-first-character-of-tag-name
- "###);
-}
-
-fn assert_char_encoding_independence<S: AsRef<str> + Clone>(
- html: &'static str,
- labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>,
-) {
- let utf8_labels = labeler(parser(html));
- let utf16_labels = labeler(parser(Utf16Reader(html.into_reader())));
-
- for (idx, (span, _)) in utf16_labels.into_iter().enumerate() {
- let expected_utf16_span = Range {
- start: html[..utf8_labels[idx].0.start].encode_utf16().count() * 2,
- end: html[..utf8_labels[idx].0.end].encode_utf16().count() * 2,
- };
- assert_eq!(
- span,
- expected_utf16_span,
- "UTF-16 span didn't match the UTF-8 span, which looks like:\n{}",
- annotate(html, vec![utf8_labels[idx].clone()])
- );
- }
-}
-
-struct Utf16Reader<'a>(html5tokenizer::reader::StringReader<'a>);
-
-impl html5tokenizer::reader::Reader for Utf16Reader<'_> {
- type Error = std::convert::Infallible;
-
- fn read_char(&mut self) -> Result<Option<char>, Self::Error> {
- self.0.read_char()
- }
-
- fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> {
- self.0.try_read_string(s, case_sensitive)
- }
-
- fn len_of_char_in_current_encoding(&self, c: char) -> usize {
- c.len_utf16() * 2
- }
-}