From b027ecdb397c2e378491f847660f8eeb740e8cf6 Mon Sep 17 00:00:00 2001
From: Martin Fischer <martin@push-f.com>
Date: Wed, 27 Sep 2023 09:25:12 +0200
Subject: chore: rename integration tests

---
 tests/spans.rs      | 626 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/test_spans.rs | 626 ----------------------------------------------------
 2 files changed, 626 insertions(+), 626 deletions(-)
 create mode 100644 tests/spans.rs
 delete mode 100644 tests/test_spans.rs

(limited to 'tests')
diff --git a/tests/spans.rs b/tests/spans.rs
new file mode 100644
index 0000000..b10808c
--- /dev/null
+++ b/tests/spans.rs
@@ -0,0 +1,626 @@
+use std::convert::Infallible;
+use std::ops::Range;
+
+use codespan_reporting::{
+    self,
+    diagnostic::{Diagnostic, Label},
+    files::SimpleFiles,
+    term::{self, termcolor::Buffer},
+};
+use html5tokenizer::{
+    offset::PosTrackingReader,
+    reader::{IntoReader, Reader},
+    trace::Trace,
+    NaiveParser, Token,
+};
+use insta::assert_snapshot;
+use similar_asserts::assert_eq;
+
+/// Just a convenient type alias for labeler closures since Rust
+/// apparently cannot infer the type (requiring an annotation).
+type Parser = NaiveParser<
+    PosTrackingReader<Box<dyn Reader<Error = Infallible>>>,
+    usize,
+    html5tokenizer::TracingEmitter,
+>;
+
+fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser
+where
+    R: Reader<Error = Infallible> + 'static,
+{
+    NaiveParser::new_with_emitter(
+        PosTrackingReader::new(
+            Box::new(reader.into_reader()) as Box<dyn Reader<Error = Infallible>>
+        ),
+        html5tokenizer::TracingEmitter::default(),
+    )
+}
+
+fn test_and_annotate<S: AsRef<str> + Clone>(
+    html: &'static str,
+    labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>,
+) -> String {
+    let labels = labeler(parser(html));
+
+    assert_char_encoding_independence(html, labeler);
+
+    annotate(html, labels)
+}
+
+fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String {
+    let mut files = SimpleFiles::new();
+    let file_id = files.add("test.html", html);
+
+    let diagnostic = Diagnostic::note().with_labels(
+        labels
+            .into_iter()
+            .map(|(span, text)| Label::primary(file_id, span).with_message(text.as_ref()))
+            .collect(),
+    );
+
+    let mut writer = Buffer::no_color();
+    let config = codespan_reporting::term::Config::default();
+    term::emit(&mut writer, &config, &files, &diagnostic).unwrap();
+    let msg = std::str::from_utf8(writer.as_slice()).unwrap();
+
+    // strip the filename and the line numbers since we don't need them
+    // (apparently they cannot be disabled in codespan_reporting)
+    msg.lines()
+        .skip(3)
+        .flat_map(|l| l.split_once("│ ").map(|s| format!("{}\n", s.1.trim_end())))
+        .collect::<Vec<_>>()
+        .join("")
+}
+
+#[test]
+fn char_span() {
+    let html = "X &amp; &doesntexist; &#1123; </";
+    let labeler = |parser: Parser| {
+        let mut labels = Vec::new();
+        for token_trace in parser.flatten() {
+            if let (Token::Char(c), Trace::Char(span)) = token_trace {
+                if c != ' ' {
+                    labels.push((span, ""));
+                }
+            }
+        }
+        labels
+    };
+    assert_snapshot!(test_and_annotate(html, labeler), @r###"
+    X &amp; &doesntexist; &#1123; </
+    ^ ^^^^^ ^^^^^^^^^^^^^ ^^^^^^^ ^^
+    "###);
+}
+
+#[test]
+fn start_tag_span() {
+    let html = "<x> <xyz> <xyz  > <xyz/>";
+    let labeler = |parser: Parser| {
+        let mut labels = Vec::new();
+        for (_, trace) in parser.flatten() {
+            if let Trace::StartTag(trace) = trace {
+                labels.push((trace.span, ""));
+            }
+        }
+        labels
+    };
+    assert_snapshot!(test_and_annotate(html, labeler), @r###"
+    <x> <xyz> <xyz  > <xyz/>
+    ^^^ ^^^^^ ^^^^^^^ ^^^^^^
+    "###);
+}
+
+#[test]
+fn end_tag_span() {
+    let html = "</x> </xyz> </xyz  > </xyz/>";
+    let labeler = |parser: Parser| {
+        let mut labels = Vec::new();
+        for (_, trace) in parser.flatten() {
+            if let Trace::EndTag(trace) = trace {
+                labels.push((trace.span, ""));
+            }
+        }
+        labels
+    };
+    assert_snapshot!(test_and_annotate(html, labeler), @r###"
+    </x> </xyz> </xyz  > </xyz/>
+    ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^
+    "###);
+}
+
+#[test]
+fn start_tag_name_span() {
+    let html = "<x> <xyz> <xyz  > <xyz/>";
+    let labeler = |parser: Parser| {
+        let mut labels = Vec::new();
+        for (_, trace) in parser.flatten() {
+            if let Trace::StartTag(trace) = trace {
+                labels.push((trace.name_span, ""));
+            }
+        }
+        labels
+    };
+    assert_snapshot!(test_and_annotate(html, labeler), @r###"
+    <x> <xyz> <xyz  > <xyz/>
+     ^   ^^^   ^^^     ^^^
+    "###);
+}
+
+#[test]
+fn end_tag_name_span() {
+    let html = "</x> </xyz> </xyz  > </xyz/>";
+    let labeler = |parser: Parser| {
+        let mut labels = Vec::new();
+        for (_, trace) in parser.flatten() {
+            if let Trace::EndTag(trace) = trace {
+                labels.push((trace.name_span, ""));
+            }
+        }
+        labels
+    };
+    assert_snapshot!(test_and_annotate(html, labeler), @r###"
+    </x> </xyz> </xyz  > </xyz/>
+      ^    ^^^    ^^^      ^^^
+    "###);
+}
+
+#[test]
+fn attribute_name_span() {
+    let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>";
+    let labeler = |parser: Parser| {
+        let mut labels = Vec::new();
+        let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
+        else {
+            panic!("expected start tag")
+        };
+        for attr in &tag.attributes {
+            labels.push((
+                trace.attribute_traces[attr.trace_idx().unwrap()].name_span(),
+                "",
+            ));
+        }
+        labels
+    };
+    assert_snapshot!(test_and_annotate(html, labeler), @r###"
+    <test x xyz y=VAL xy=VAL z = VAL yzx = VAL>
+          ^ ^^^ ^     ^^     ^       ^^^
+    "###);
+}
+
+#[test]
+fn attribute_value_span() {
+    let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>";
+    let labeler = |parser: Parser| {
+        let mut labels = Vec::new();
+        let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
+        else {
+            panic!("expected start tag")
+        };
+        for attr in &tag.attributes {
+            labels.push((
+                trace.attribute_traces[attr.trace_idx().unwrap()]
+                    .value_span()
+                    .unwrap(),
+                "",
+            ));
+        }
+        labels
+    };
+    assert_snapshot!(test_and_annotate(html, labeler), @r###"
+    <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''>
+            ^^^^^^^^     ^^^^^^^^    ^^^^^^^^^^^^^      ^^^^^^^^^^^^^         ^
+    "###);
+}
+
+#[test]
+fn attribute_value_with_char_ref() {
+    let html = "<test x=&amp; y='&amp;' z=\"&amp;\">";
+    let labeler = |parser: Parser| {
+        let mut labels = Vec::new();
+        let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
+        else {
+            panic!("expected start tag")
+        };
+        for attr in &tag.attributes {
+            labels.push((
+                trace.attribute_traces[attr.trace_idx().unwrap()]
+                    .value_span()
+                    .unwrap(),
+                "",
+            ));
+        }
+        labels
+    };
+    assert_snapshot!(test_and_annotate(html, labeler), @r###"
+    <test x=&amp; y='&amp;' z="&amp;">
+            ^^^^^    ^^^^^     ^^^^^
+    "###);
+}
+
+#[test]
+fn comment_data_span() {
+    #[rustfmt::skip]
+    let cases = [
+        "<!-- Why are you looking at the source code? -->",
+        "<!-- Why are you looking at the source code? --",
+        "<!-- Why are you looking at the source code? -",
+        "<!-- Why are you looking at the source code?",
+        "<!--",
+        "<!-->",
+        "<!---",
+        "<!--->",
+        "<!-- Why are you looking at the source code? ->",
+        "<!-- Why are you looking at the source code? --!>",
+        "<!-- Why are you looking at the source code? --!",
+
+        // bogus comments
+        "<! Why are you looking at the source code? -->",
+        "<!",
+    ];
+
+    let mut annotated = String::new();
+    for case in cases {
+        let labeler = |parser: Parser| {
+            let (_, Trace::Comment(comment)) = parser.flatten().next().unwrap() else {
+                panic!("expected comment");
+            };
+            vec![(comment.data_span, "")]
+        };
+
+        annotated.push_str(&test_and_annotate(case, labeler));
+    }
+
+    assert_snapshot!(annotated, @r###"
+    <!-- Why are you looking at the source code? -->
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    <!-- Why are you looking at the source code? --
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    <!-- Why are you looking at the source code? -
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    <!-- Why are you looking at the source code?
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    <!--
+        ^
+    <!-->
+        ^
+    <!---
+        ^
+    <!--->
+        ^
+    <!-- Why are you looking at the source code? ->
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    <!-- Why are you looking at the source code? --!>
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    <!-- Why are you looking at the source code? --!
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    <! Why are you looking at the source code? -->
+      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    <!
+      ^
+    "###);
+
+    for (idx, case) in cases.iter().enumerate() {
+        let (Token::Comment(data), Trace::Comment(trace)) = parser(*case).flatten().next().unwrap()
+        else {
+            panic!("expected comment");
+        };
+        assert_eq!(case[trace.data_span], data, "case {idx}");
+    }
+}
+
+#[test]
+fn doctype_span() {
+    #[rustfmt::skip]
+    let cases = [
+        r#"<!DOCTYPE       HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"     >"#,
+    ];
+
+    let mut annotated = String::new();
+    for case in cases {
+        let labeler = |parser: Parser| {
+            let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else {
+                panic!("expected doctype");
+            };
+            vec![(trace.span(), "")]
+        };
+        annotated.push_str(&test_and_annotate(case, labeler));
+    }
+
+    assert_snapshot!(annotated, @r###"
+    <!DOCTYPE       HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"     >
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    "###);
+}
+
+#[test]
+fn doctype_id_spans() {
+    #[rustfmt::skip]
+    let cases = [
+        r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#,
+    ];
+
+    let mut annotated = String::new();
+    for case in cases {
+        let labeler = |parser: Parser| {
+            let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else {
+                panic!("expected doctype");
+            };
+
+            let mut labels = Vec::new();
+            if let Some(name_span) = trace.name_span() {
+                labels.push((name_span, "name"));
+            }
+            if let Some(public_id_span) = trace.public_id_span() {
+                labels.push((public_id_span, "public id"));
+            }
+            if let Some(system_id_span) = trace.system_id_span() {
+                labels.push((system_id_span, "system id"));
+            }
+            labels
+        };
+
+        annotated.push_str(&test_and_annotate(case, labeler));
+    }
+
+    assert_snapshot!(annotated, @r###"
+    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+              ^^^^         ^^^^^^^^^^^^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id
+              │            │
+              │            public id
+              name
+    "###);
+}
+
+#[test]
+fn eof_offset() {
+    let html = "Where does it end?";
+    let labeler = |parser: Parser| {
+        let mut labels = Vec::new();
+        for (_, trace) in parser.flatten() {
+            if let Trace::EndOfFile(offset) = trace {
+                labels.push((offset..offset, "here"));
+            }
+        }
+        labels
+    };
+    assert_snapshot!(test_and_annotate(html, labeler), @r###"
+    Where does it end?
+                      ^ here
+    "###);
+}
+
+fn annotate_errors(html: &'static str) -> String {
+    let mut parser = parser(html);
+    for _ in parser.by_ref() {}
+    let errors: Vec<_> = parser.emitter_mut().drain_errors().collect();
+
+    for (_, span) in errors {
+        if span.start == span.end {
+            if span.start != html.len() {
+                panic!("empty error spans are only allowed at the very end of the source (for eof errors)");
+            }
+        } else {
+            assert!(span.start < span.end);
+            assert!(span.end <= html.len());
+        }
+    }
+
+    let labeler = |mut parser: Parser| {
+        let mut labels = Vec::new();
+        for _ in parser.by_ref() {}
+        for (error, span) in parser.emitter_mut().drain_errors() {
+            labels.push((span, error.code()));
+        }
+        labels
+    };
+
+    test_and_annotate(html, labeler)
+}
+
+#[test]
+fn tests_for_errors_are_sorted() {
+    let source_of_this_file = std::fs::read_to_string(file!()).unwrap();
+    let mut error_tests: Vec<_> = source_of_this_file
+        .lines()
+        .filter(|l| l.starts_with("fn error_"))
+        .collect();
+    let error_tests_found_order = error_tests.join("\n");
+    error_tests.sort();
+    let error_tests_sorted = error_tests.join("\n");
+    assert_eq!(error_tests_found_order, error_tests_sorted);
+}
+
+#[test]
+fn error_char_ref_absence_of_digits() {
+    let html = "&#qux;";
+    assert_snapshot!(annotate_errors(html), @r###"
+    &#qux;
+    ^^^ absence-of-digits-in-numeric-character-reference
+    "###);
+}
+
+#[test]
+fn error_char_ref_control_char() {
+    let html = "&#127;";
+    assert_snapshot!(annotate_errors(html), @r###"
+    &#127;
+    ^^^^^^ control-character-reference
+    "###);
+}
+
+#[test]
+fn error_char_ref_missing_semicolon() {
+    let html = "&not";
+    assert_snapshot!(annotate_errors(html), @r###"
+    &not
+        ^ missing-semicolon-after-character-reference
+    "###);
+}
+
+#[test]
+fn error_char_ref_noncharacter() {
+    let html = "&#xFDD0;";
+    assert_snapshot!(annotate_errors(html), @r###"
+    &#xFDD0;
+    ^^^^^^^^ noncharacter-character-reference
+    "###);
+}
+
+#[test]
+fn error_char_ref_null_char() {
+    let html = "&#0;";
+    assert_snapshot!(annotate_errors(html), @r###"
+    &#0;
+    ^^^^ null-character-reference
+    "###);
+}
+
+#[test]
+fn error_char_ref_outside_unicode_range() {
+    let html = "&#9999999;";
+    assert_snapshot!(annotate_errors(html), @r###"
+    &#9999999;
+    ^^^^^^^^^^ character-reference-outside-unicode-range
+    "###);
+}
+
+#[test]
+fn error_char_ref_surrogate() {
+    let html = "&#xD800;";
+    assert_snapshot!(annotate_errors(html), @r###"
+    &#xD800;
+    ^^^^^^^^ surrogate-character-reference
+    "###);
+}
+
+#[test]
+fn error_char_ref_unknown_named() {
+    let html = "The pirate says &arrrrr;";
+    assert_snapshot!(annotate_errors(html), @r###"
+    The pirate says &arrrrr;
+                    ^^^^^^^^ unknown-named-character-reference
+    "###);
+}
+
+#[test]
+fn error_duplicate_attribute() {
+    let html = "Does this open two pages? <a href=foo.html href=bar.html>";
+    assert_snapshot!(annotate_errors(html), @r###"
+    Does this open two pages? <a href=foo.html href=bar.html>
+                                               ^^^^ duplicate-attribute
+    "###);
+}
+
+#[test]
+fn error_end_tag_with_attributes() {
+    let html = "</end-tag first second=value>";
+    assert_snapshot!(annotate_errors(html), @r###"
+    </end-tag first second=value>
+                    ^^^^^^ end-tag-with-attributes
+    "###);
+}
+
+#[test]
+fn error_end_tag_with_trailing_solidus() {
+    let html = "Do you start or do you end? </yes/>";
+    assert_snapshot!(annotate_errors(html), @r###"
+    Do you start or do you end? </yes/>
+                                     ^ end-tag-with-trailing-solidus
+    "###);
+}
+
+#[test]
+fn error_eof_before_tag_name() {
+    let html = "<";
+    assert_snapshot!(annotate_errors(html), @r###"
+    <
+     ^ eof-before-tag-name
+    "###);
+}
+
+// TODO: add error_eof_in_cdata test
+// blocked by lack of proper tree constructor (NaiveParser doesn't parse CDATA sections)
+
+#[test]
+fn error_eof_in_comment() {
+    let html = "<!--";
+    assert_snapshot!(annotate_errors(html), @r###"
+    <!--
+        ^ eof-in-comment
+    "###);
+}
+
+#[test]
+fn error_eof_in_doctype() {
+    let html = "<!doctype html";
+    assert_snapshot!(annotate_errors(html), @r###"
+    <!doctype html
+                  ^ eof-in-doctype
+    "###);
+}
+
+#[test]
+fn error_eof_in_script_html_comment_like_text() {
+    let html = "<script><!--";
+    assert_snapshot!(annotate_errors(html), @r###"
+    <script><!--
+                ^ eof-in-script-html-comment-like-text
+    "###);
+}
+
+#[test]
+fn error_eof_in_tag() {
+    let html = "</sarcasm";
+    assert_snapshot!(annotate_errors(html), @r###"
+    </sarcasm
+             ^ eof-in-tag
+    "###);
+}
+
+#[test]
+fn error_invalid_first_character_of_tag_name() {
+    let html = "Please mind the gap: < test";
+    assert_snapshot!(annotate_errors(html), @r###"
+    Please mind the gap: < test
+                          ^ invalid-first-character-of-tag-name
+    "###);
+}
+
+fn assert_char_encoding_independence<S: AsRef<str> + Clone>(
+    html: &'static str,
+    labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>,
+) {
+    let utf8_labels = labeler(parser(html));
+    let utf16_labels = labeler(parser(Utf16Reader(html.into_reader())));
+
+    for (idx, (span, _)) in utf16_labels.into_iter().enumerate() {
+        let expected_utf16_span = Range {
+            start: html[..utf8_labels[idx].0.start].encode_utf16().count() * 2,
+            end: html[..utf8_labels[idx].0.end].encode_utf16().count() * 2,
+        };
+        assert_eq!(
+            span,
+            expected_utf16_span,
+            "UTF-16 span didn't match the UTF-8 span, which looks like:\n{}",
+            annotate(html, vec![utf8_labels[idx].clone()])
+        );
+    }
+}
+
+struct Utf16Reader<'a>(html5tokenizer::reader::StringReader<'a>);
+
+impl html5tokenizer::reader::Reader for Utf16Reader<'_> {
+    type Error = std::convert::Infallible;
+
+    fn read_char(&mut self) -> Result<Option<char>, Self::Error> {
+        self.0.read_char()
+    }
+
+    fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> {
+        self.0.try_read_string(s, case_sensitive)
+    }
+
+    fn len_of_char_in_current_encoding(&self, c: char) -> usize {
+        c.len_utf16() * 2
+    }
+}
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
deleted file mode 100644
index b10808c..0000000
--- a/tests/test_spans.rs
+++ /dev/null
@@ -1,626 +0,0 @@
-use std::convert::Infallible;
-use std::ops::Range;
-
-use codespan_reporting::{
-    self,
-    diagnostic::{Diagnostic, Label},
-    files::SimpleFiles,
-    term::{self, termcolor::Buffer},
-};
-use html5tokenizer::{
-    offset::PosTrackingReader,
-    reader::{IntoReader, Reader},
-    trace::Trace,
-    NaiveParser, Token,
-};
-use insta::assert_snapshot;
-use similar_asserts::assert_eq;
-
-/// Just a convenient type alias for labeler closures since Rust
-/// apparently cannot infer the type (requiring an annotation).
-type Parser = NaiveParser<
-    PosTrackingReader<Box<dyn Reader<Error = Infallible>>>,
-    usize,
-    html5tokenizer::TracingEmitter,
->;
-
-fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser
-where
-    R: Reader<Error = Infallible> + 'static,
-{
-    NaiveParser::new_with_emitter(
-        PosTrackingReader::new(
-            Box::new(reader.into_reader()) as Box<dyn Reader<Error = Infallible>>
-        ),
-        html5tokenizer::TracingEmitter::default(),
-    )
-}
-
-fn test_and_annotate<S: AsRef<str> + Clone>(
-    html: &'static str,
-    labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>,
-) -> String {
-    let labels = labeler(parser(html));
-
-    assert_char_encoding_independence(html, labeler);
-
-    annotate(html, labels)
-}
-
-fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String {
-    let mut files = SimpleFiles::new();
-    let file_id = files.add("test.html", html);
-
-    let diagnostic = Diagnostic::note().with_labels(
-        labels
-            .into_iter()
-            .map(|(span, text)| Label::primary(file_id, span).with_message(text.as_ref()))
-            .collect(),
-    );
-
-    let mut writer = Buffer::no_color();
-    let config = codespan_reporting::term::Config::default();
-    term::emit(&mut writer, &config, &files, &diagnostic).unwrap();
-    let msg = std::str::from_utf8(writer.as_slice()).unwrap();
-
-    // strip the filename and the line numbers since we don't need them
-    // (apparently they cannot be disabled in codespan_reporting)
-    msg.lines()
-        .skip(3)
-        .flat_map(|l| l.split_once("│ ").map(|s| format!("{}\n", s.1.trim_end())))
-        .collect::<Vec<_>>()
-        .join("")
-}
-
-#[test]
-fn char_span() {
-    let html = "X &amp; &doesntexist; &#1123; </";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for token_trace in parser.flatten() {
-            if let (Token::Char(c), Trace::Char(span)) = token_trace {
-                if c != ' ' {
-                    labels.push((span, ""));
-                }
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    X &amp; &doesntexist; &#1123; </
-    ^ ^^^^^ ^^^^^^^^^^^^^ ^^^^^^^ ^^
-    "###);
-}
-
-#[test]
-fn start_tag_span() {
-    let html = "<x> <xyz> <xyz  > <xyz/>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::StartTag(trace) = trace {
-                labels.push((trace.span, ""));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <x> <xyz> <xyz  > <xyz/>
-    ^^^ ^^^^^ ^^^^^^^ ^^^^^^
-    "###);
-}
-
-#[test]
-fn end_tag_span() {
-    let html = "</x> </xyz> </xyz  > </xyz/>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::EndTag(trace) = trace {
-                labels.push((trace.span, ""));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    </x> </xyz> </xyz  > </xyz/>
-    ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^
-    "###);
-}
-
-#[test]
-fn start_tag_name_span() {
-    let html = "<x> <xyz> <xyz  > <xyz/>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::StartTag(trace) = trace {
-                labels.push((trace.name_span, ""));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <x> <xyz> <xyz  > <xyz/>
-     ^   ^^^   ^^^     ^^^
-    "###);
-}
-
-#[test]
-fn end_tag_name_span() {
-    let html = "</x> </xyz> </xyz  > </xyz/>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::EndTag(trace) = trace {
-                labels.push((trace.name_span, ""));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    </x> </xyz> </xyz  > </xyz/>
-      ^    ^^^    ^^^      ^^^
-    "###);
-}
-
-#[test]
-fn attribute_name_span() {
-    let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
-        else {
-            panic!("expected start tag")
-        };
-        for attr in &tag.attributes {
-            labels.push((
-                trace.attribute_traces[attr.trace_idx().unwrap()].name_span(),
-                "",
-            ));
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <test x xyz y=VAL xy=VAL z = VAL yzx = VAL>
-          ^ ^^^ ^     ^^     ^       ^^^
-    "###);
-}
-
-#[test]
-fn attribute_value_span() {
-    let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
-        else {
-            panic!("expected start tag")
-        };
-        for attr in &tag.attributes {
-            labels.push((
-                trace.attribute_traces[attr.trace_idx().unwrap()]
-                    .value_span()
-                    .unwrap(),
-                "",
-            ));
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''>
-            ^^^^^^^^     ^^^^^^^^    ^^^^^^^^^^^^^      ^^^^^^^^^^^^^         ^
-    "###);
-}
-
-#[test]
-fn attribute_value_with_char_ref() {
-    let html = "<test x=&amp; y='&amp;' z=\"&amp;\">";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
-        else {
-            panic!("expected start tag")
-        };
-        for attr in &tag.attributes {
-            labels.push((
-                trace.attribute_traces[attr.trace_idx().unwrap()]
-                    .value_span()
-                    .unwrap(),
-                "",
-            ));
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <test x=&amp; y='&amp;' z="&amp;">
-            ^^^^^    ^^^^^     ^^^^^
-    "###);
-}
-
-#[test]
-fn comment_data_span() {
-    #[rustfmt::skip]
-    let cases = [
-        "<!-- Why are you looking at the source code? -->",
-        "<!-- Why are you looking at the source code? --",
-        "<!-- Why are you looking at the source code? -",
-        "<!-- Why are you looking at the source code?",
-        "<!--",
-        "<!-->",
-        "<!---",
-        "<!--->",
-        "<!-- Why are you looking at the source code? ->",
-        "<!-- Why are you looking at the source code? --!>",
-        "<!-- Why are you looking at the source code? --!",
-
-        // bogus comments
-        "<! Why are you looking at the source code? -->",
-        "<!",
-    ];
-
-    let mut annotated = String::new();
-    for case in cases {
-        let labeler = |parser: Parser| {
-            let (_, Trace::Comment(comment)) = parser.flatten().next().unwrap() else {
-                panic!("expected comment");
-            };
-            vec![(comment.data_span, "")]
-        };
-
-        annotated.push_str(&test_and_annotate(case, labeler));
-    }
-
-    assert_snapshot!(annotated, @r###"
-    <!-- Why are you looking at the source code? -->
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code? --
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code? -
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code?
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!--
-        ^
-    <!-->
-        ^
-    <!---
-        ^
-    <!--->
-        ^
-    <!-- Why are you looking at the source code? ->
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code? --!>
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code? --!
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <! Why are you looking at the source code? -->
-      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!
-      ^
-    "###);
-
-    for (idx, case) in cases.iter().enumerate() {
-        let (Token::Comment(data), Trace::Comment(trace)) = parser(*case).flatten().next().unwrap()
-        else {
-            panic!("expected comment");
-        };
-        assert_eq!(case[trace.data_span], data, "case {idx}");
-    }
-}
-
-#[test]
-fn doctype_span() {
-    #[rustfmt::skip]
-    let cases = [
-        r#"<!DOCTYPE       HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"     >"#,
-    ];
-
-    let mut annotated = String::new();
-    for case in cases {
-        let labeler = |parser: Parser| {
-            let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else {
-                panic!("expected doctype");
-            };
-            vec![(trace.span(), "")]
-        };
-        annotated.push_str(&test_and_annotate(case, labeler));
-    }
-
-    assert_snapshot!(annotated, @r###"
-    <!DOCTYPE       HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"     >
-    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    "###);
-}
-
-#[test]
-fn doctype_id_spans() {
-    #[rustfmt::skip]
-    let cases = [
-        r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#,
-    ];
-
-    let mut annotated = String::new();
-    for case in cases {
-        let labeler = |parser: Parser| {
-            let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else {
-                panic!("expected doctype");
-            };
-
-            let mut labels = Vec::new();
-            if let Some(name_span) = trace.name_span() {
-                labels.push((name_span, "name"));
-            }
-            if let Some(public_id_span) = trace.public_id_span() {
-                labels.push((public_id_span, "public id"));
-            }
-            if let Some(system_id_span) = trace.system_id_span() {
-                labels.push((system_id_span, "system id"));
-            }
-            labels
-        };
-
-        annotated.push_str(&test_and_annotate(case, labeler));
-    }
-
-    assert_snapshot!(annotated, @r###"
-    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-              ^^^^         ^^^^^^^^^^^^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id
-              │            │
-              │            public id
-              name
-    "###);
-}
-
-#[test]
-fn eof_offset() {
-    let html = "Where does it end?";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::EndOfFile(offset) = trace {
-                labels.push((offset..offset, "here"));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    Where does it end?
-                      ^ here
-    "###);
-}
-
-fn annotate_errors(html: &'static str) -> String {
-    let mut parser = parser(html);
-    for _ in parser.by_ref() {}
-    let errors: Vec<_> = parser.emitter_mut().drain_errors().collect();
-
-    for (_, span) in errors {
-        if span.start == span.end {
-            if span.start != html.len() {
-                panic!("empty error spans are only allowed at the very end of the source (for eof errors)");
-            }
-        } else {
-            assert!(span.start < span.end);
-            assert!(span.end <= html.len());
-        }
-    }
-
-    let labeler = |mut parser: Parser| {
-        let mut labels = Vec::new();
-        for _ in parser.by_ref() {}
-        for (error, span) in parser.emitter_mut().drain_errors() {
-            labels.push((span, error.code()));
-        }
-        labels
-    };
-
-    test_and_annotate(html, labeler)
-}
-
-#[test]
-fn tests_for_errors_are_sorted() {
-    let source_of_this_file = std::fs::read_to_string(file!()).unwrap();
-    let mut error_tests: Vec<_> = source_of_this_file
-        .lines()
-        .filter(|l| l.starts_with("fn error_"))
-        .collect();
-    let error_tests_found_order = error_tests.join("\n");
-    error_tests.sort();
-    let error_tests_sorted = error_tests.join("\n");
-    assert_eq!(error_tests_found_order, error_tests_sorted);
-}
-
-#[test]
-fn error_char_ref_absence_of_digits() {
-    let html = "&#qux;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#qux;
-    ^^^ absence-of-digits-in-numeric-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_control_char() {
-    let html = "&#127;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#127;
-    ^^^^^^ control-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_missing_semicolon() {
-    let html = "&not";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &not
-        ^ missing-semicolon-after-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_noncharacter() {
-    let html = "&#xFDD0;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#xFDD0;
-    ^^^^^^^^ noncharacter-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_null_char() {
-    let html = "&#0;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#0;
-    ^^^^ null-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_outside_unicode_range() {
-    let html = "&#9999999;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#9999999;
-    ^^^^^^^^^^ character-reference-outside-unicode-range
-    "###);
-}
-
-#[test]
-fn error_char_ref_surrogate() {
-    let html = "&#xD800;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#xD800;
-    ^^^^^^^^ surrogate-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_unknown_named() {
-    let html = "The pirate says &arrrrr;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    The pirate says &arrrrr;
-                    ^^^^^^^^ unknown-named-character-reference
-    "###);
-}
-
-#[test]
-fn error_duplicate_attribute() {
-    let html = "Does this open two pages? <a href=foo.html href=bar.html>";
-    assert_snapshot!(annotate_errors(html), @r###"
-    Does this open two pages? <a href=foo.html href=bar.html>
-                                               ^^^^ duplicate-attribute
-    "###);
-}
-
-#[test]
-fn error_end_tag_with_attributes() {
-    let html = "</end-tag first second=value>";
-    assert_snapshot!(annotate_errors(html), @r###"
-    </end-tag first second=value>
-                    ^^^^^^ end-tag-with-attributes
-    "###);
-}
-
-#[test]
-fn error_end_tag_with_trailing_solidus() {
-    let html = "Do you start or do you end? </yes/>";
-    assert_snapshot!(annotate_errors(html), @r###"
-    Do you start or do you end? </yes/>
-                                     ^ end-tag-with-trailing-solidus
-    "###);
-}
-
-#[test]
-fn error_eof_before_tag_name() {
-    let html = "<";
-    assert_snapshot!(annotate_errors(html), @r###"
-    <
-     ^ eof-before-tag-name
-    "###);
-}
-
-// TODO: add error_eof_in_cdata test
-// blocked by lack of proper tree constructor (NaiveParser doesn't parse CDATA sections)
-
-#[test]
-fn error_eof_in_comment() {
-    let html = "<!--";
-    assert_snapshot!(annotate_errors(html), @r###"
-    <!--
-        ^ eof-in-comment
-    "###);
-}
-
-#[test]
-fn error_eof_in_doctype() {
-    let html = "<!doctype html";
-    assert_snapshot!(annotate_errors(html), @r###"
-    <!doctype html
-                  ^ eof-in-doctype
-    "###);
-}
-
-#[test]
-fn error_eof_in_script_html_comment_like_text() {
-    let html = "<script><!--";
-    assert_snapshot!(annotate_errors(html), @r###"
-    <script><!--
-                ^ eof-in-script-html-comment-like-text
-    "###);
-}
-
-#[test]
-fn error_eof_in_tag() {
-    let html = "</sarcasm";
-    assert_snapshot!(annotate_errors(html), @r###"
-    </sarcasm
-             ^ eof-in-tag
-    "###);
-}
-
-#[test]
-fn error_invalid_first_character_of_tag_name() {
-    let html = "Please mind the gap: < test";
-    assert_snapshot!(annotate_errors(html), @r###"
-    Please mind the gap: < test
-                          ^ invalid-first-character-of-tag-name
-    "###);
-}
-
-fn assert_char_encoding_independence<S: AsRef<str> + Clone>(
-    html: &'static str,
-    labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>,
-) {
-    let utf8_labels = labeler(parser(html));
-    let utf16_labels = labeler(parser(Utf16Reader(html.into_reader())));
-
-    for (idx, (span, _)) in utf16_labels.into_iter().enumerate() {
-        let expected_utf16_span = Range {
-            start: html[..utf8_labels[idx].0.start].encode_utf16().count() * 2,
-            end: html[..utf8_labels[idx].0.end].encode_utf16().count() * 2,
-        };
-        assert_eq!(
-            span,
-            expected_utf16_span,
-            "UTF-16 span didn't match the UTF-8 span, which looks like:\n{}",
-            annotate(html, vec![utf8_labels[idx].clone()])
-        );
-    }
-}
-
-struct Utf16Reader<'a>(html5tokenizer::reader::StringReader<'a>);
-
-impl html5tokenizer::reader::Reader for Utf16Reader<'_> {
-    type Error = std::convert::Infallible;
-
-    fn read_char(&mut self) -> Result<Option<char>, Self::Error> {
-        self.0.read_char()
-    }
-
-    fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> {
-        self.0.try_read_string(s, case_sensitive)
-    }
-
-    fn len_of_char_in_current_encoding(&self, c: char) -> usize {
-        c.len_utf16() * 2
-    }
-}
-- 
cgit v1.2.3