1 files changed, 0 insertions, 626 deletions
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
deleted file mode 100644
index b10808c..0000000
--- a/tests/test_spans.rs
+++ /dev/null
@@ -1,626 +0,0 @@
-use std::convert::Infallible;
-use std::ops::Range;
-
-use codespan_reporting::{
-    self,
-    diagnostic::{Diagnostic, Label},
-    files::SimpleFiles,
-    term::{self, termcolor::Buffer},
-};
-use html5tokenizer::{
-    offset::PosTrackingReader,
-    reader::{IntoReader, Reader},
-    trace::Trace,
-    NaiveParser, Token,
-};
-use insta::assert_snapshot;
-use similar_asserts::assert_eq;
-
-/// Just a convenient type alias for labeler closures since Rust
-/// apparently cannot infer the type (requiring an annotation).
-type Parser = NaiveParser<
-    PosTrackingReader<Box<dyn Reader<Error = Infallible>>>,
-    usize,
-    html5tokenizer::TracingEmitter,
->;
-
-fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser
-where
-    R: Reader<Error = Infallible> + 'static,
-{
-    NaiveParser::new_with_emitter(
-        PosTrackingReader::new(
-            Box::new(reader.into_reader()) as Box<dyn Reader<Error = Infallible>>
-        ),
-        html5tokenizer::TracingEmitter::default(),
-    )
-}
-
-fn test_and_annotate<S: AsRef<str> + Clone>(
-    html: &'static str,
-    labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>,
-) -> String {
-    let labels = labeler(parser(html));
-
-    assert_char_encoding_independence(html, labeler);
-
-    annotate(html, labels)
-}
-
-fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String {
-    let mut files = SimpleFiles::new();
-    let file_id = files.add("test.html", html);
-
-    let diagnostic = Diagnostic::note().with_labels(
-        labels
-            .into_iter()
-            .map(|(span, text)| Label::primary(file_id, span).with_message(text.as_ref()))
-            .collect(),
-    );
-
-    let mut writer = Buffer::no_color();
-    let config = codespan_reporting::term::Config::default();
-    term::emit(&mut writer, &config, &files, &diagnostic).unwrap();
-    let msg = std::str::from_utf8(writer.as_slice()).unwrap();
-
-    // strip the filename and the line numbers since we don't need them
-    // (apparently they cannot be disabled in codespan_reporting)
-    msg.lines()
-        .skip(3)
-        .flat_map(|l| l.split_once("│ ").map(|s| format!("{}\n", s.1.trim_end())))
-        .collect::<Vec<_>>()
-        .join("")
-}
-
-#[test]
-fn char_span() {
-    let html = "X &amp; &doesntexist; &#1123; </";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for token_trace in parser.flatten() {
-            if let (Token::Char(c), Trace::Char(span)) = token_trace {
-                if c != ' ' {
-                    labels.push((span, ""));
-                }
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    X &amp; &doesntexist; &#1123; </
-    ^ ^^^^^ ^^^^^^^^^^^^^ ^^^^^^^ ^^
-    "###);
-}
-
-#[test]
-fn start_tag_span() {
-    let html = "<x> <xyz> <xyz  > <xyz/>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::StartTag(trace) = trace {
-                labels.push((trace.span, ""));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <x> <xyz> <xyz  > <xyz/>
-    ^^^ ^^^^^ ^^^^^^^ ^^^^^^
-    "###);
-}
-
-#[test]
-fn end_tag_span() {
-    let html = "</x> </xyz> </xyz  > </xyz/>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::EndTag(trace) = trace {
-                labels.push((trace.span, ""));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    </x> </xyz> </xyz  > </xyz/>
-    ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^
-    "###);
-}
-
-#[test]
-fn start_tag_name_span() {
-    let html = "<x> <xyz> <xyz  > <xyz/>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::StartTag(trace) = trace {
-                labels.push((trace.name_span, ""));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <x> <xyz> <xyz  > <xyz/>
-     ^   ^^^   ^^^     ^^^
-    "###);
-}
-
-#[test]
-fn end_tag_name_span() {
-    let html = "</x> </xyz> </xyz  > </xyz/>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::EndTag(trace) = trace {
-                labels.push((trace.name_span, ""));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    </x> </xyz> </xyz  > </xyz/>
-      ^    ^^^    ^^^      ^^^
-    "###);
-}
-
-#[test]
-fn attribute_name_span() {
-    let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
-        else {
-            panic!("expected start tag")
-        };
-        for attr in &tag.attributes {
-            labels.push((
-                trace.attribute_traces[attr.trace_idx().unwrap()].name_span(),
-                "",
-            ));
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <test x xyz y=VAL xy=VAL z = VAL yzx = VAL>
-          ^ ^^^ ^     ^^     ^       ^^^
-    "###);
-}
-
-#[test]
-fn attribute_value_span() {
-    let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
-        else {
-            panic!("expected start tag")
-        };
-        for attr in &tag.attributes {
-            labels.push((
-                trace.attribute_traces[attr.trace_idx().unwrap()]
-                    .value_span()
-                    .unwrap(),
-                "",
-            ));
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''>
-            ^^^^^^^^     ^^^^^^^^    ^^^^^^^^^^^^^      ^^^^^^^^^^^^^         ^
-    "###);
-}
-
-#[test]
-fn attribute_value_with_char_ref() {
-    let html = "<test x=&amp; y='&amp;' z=\"&amp;\">";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap()
-        else {
-            panic!("expected start tag")
-        };
-        for attr in &tag.attributes {
-            labels.push((
-                trace.attribute_traces[attr.trace_idx().unwrap()]
-                    .value_span()
-                    .unwrap(),
-                "",
-            ));
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    <test x=&amp; y='&amp;' z="&amp;">
-            ^^^^^    ^^^^^     ^^^^^
-    "###);
-}
-
-#[test]
-fn comment_data_span() {
-    #[rustfmt::skip]
-    let cases = [
-        "<!-- Why are you looking at the source code? -->",
-        "<!-- Why are you looking at the source code? --",
-        "<!-- Why are you looking at the source code? -",
-        "<!-- Why are you looking at the source code?",
-        "<!--",
-        "<!-->",
-        "<!---",
-        "<!--->",
-        "<!-- Why are you looking at the source code? ->",
-        "<!-- Why are you looking at the source code? --!>",
-        "<!-- Why are you looking at the source code? --!",
-
-        // bogus comments
-        "<! Why are you looking at the source code? -->",
-        "<!",
-    ];
-
-    let mut annotated = String::new();
-    for case in cases {
-        let labeler = |parser: Parser| {
-            let (_, Trace::Comment(comment)) = parser.flatten().next().unwrap() else {
-                panic!("expected comment");
-            };
-            vec![(comment.data_span, "")]
-        };
-
-        annotated.push_str(&test_and_annotate(case, labeler));
-    }
-
-    assert_snapshot!(annotated, @r###"
-    <!-- Why are you looking at the source code? -->
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code? --
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code? -
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code?
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!--
-        ^
-    <!-->
-        ^
-    <!---
-        ^
-    <!--->
-        ^
-    <!-- Why are you looking at the source code? ->
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code? --!>
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!-- Why are you looking at the source code? --!
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <! Why are you looking at the source code? -->
-      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    <!
-      ^
-    "###);
-
-    for (idx, case) in cases.iter().enumerate() {
-        let (Token::Comment(data), Trace::Comment(trace)) = parser(*case).flatten().next().unwrap()
-        else {
-            panic!("expected comment");
-        };
-        assert_eq!(case[trace.data_span], data, "case {idx}");
-    }
-}
-
-#[test]
-fn doctype_span() {
-    #[rustfmt::skip]
-    let cases = [
-        r#"<!DOCTYPE       HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"     >"#,
-    ];
-
-    let mut annotated = String::new();
-    for case in cases {
-        let labeler = |parser: Parser| {
-            let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else {
-                panic!("expected doctype");
-            };
-            vec![(trace.span(), "")]
-        };
-        annotated.push_str(&test_and_annotate(case, labeler));
-    }
-
-    assert_snapshot!(annotated, @r###"
-    <!DOCTYPE       HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"     >
-    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    "###);
-}
-
-#[test]
-fn doctype_id_spans() {
-    #[rustfmt::skip]
-    let cases = [
-        r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#,
-    ];
-
-    let mut annotated = String::new();
-    for case in cases {
-        let labeler = |parser: Parser| {
-            let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else {
-                panic!("expected doctype");
-            };
-
-            let mut labels = Vec::new();
-            if let Some(name_span) = trace.name_span() {
-                labels.push((name_span, "name"));
-            }
-            if let Some(public_id_span) = trace.public_id_span() {
-                labels.push((public_id_span, "public id"));
-            }
-            if let Some(system_id_span) = trace.system_id_span() {
-                labels.push((system_id_span, "system id"));
-            }
-            labels
-        };
-
-        annotated.push_str(&test_and_annotate(case, labeler));
-    }
-
-    assert_snapshot!(annotated, @r###"
-    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-              ^^^^         ^^^^^^^^^^^^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id
-              │            │
-              │            public id
-              name
-    "###);
-}
-
-#[test]
-fn eof_offset() {
-    let html = "Where does it end?";
-    let labeler = |parser: Parser| {
-        let mut labels = Vec::new();
-        for (_, trace) in parser.flatten() {
-            if let Trace::EndOfFile(offset) = trace {
-                labels.push((offset..offset, "here"));
-            }
-        }
-        labels
-    };
-    assert_snapshot!(test_and_annotate(html, labeler), @r###"
-    Where does it end?
-                      ^ here
-    "###);
-}
-
-fn annotate_errors(html: &'static str) -> String {
-    let mut parser = parser(html);
-    for _ in parser.by_ref() {}
-    let errors: Vec<_> = parser.emitter_mut().drain_errors().collect();
-
-    for (_, span) in errors {
-        if span.start == span.end {
-            if span.start != html.len() {
-                panic!("empty error spans are only allowed at the very end of the source (for eof errors)");
-            }
-        } else {
-            assert!(span.start < span.end);
-            assert!(span.end <= html.len());
-        }
-    }
-
-    let labeler = |mut parser: Parser| {
-        let mut labels = Vec::new();
-        for _ in parser.by_ref() {}
-        for (error, span) in parser.emitter_mut().drain_errors() {
-            labels.push((span, error.code()));
-        }
-        labels
-    };
-
-    test_and_annotate(html, labeler)
-}
-
-#[test]
-fn tests_for_errors_are_sorted() {
-    let source_of_this_file = std::fs::read_to_string(file!()).unwrap();
-    let mut error_tests: Vec<_> = source_of_this_file
-        .lines()
-        .filter(|l| l.starts_with("fn error_"))
-        .collect();
-    let error_tests_found_order = error_tests.join("\n");
-    error_tests.sort();
-    let error_tests_sorted = error_tests.join("\n");
-    assert_eq!(error_tests_found_order, error_tests_sorted);
-}
-
-#[test]
-fn error_char_ref_absence_of_digits() {
-    let html = "&#qux;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#qux;
-    ^^^ absence-of-digits-in-numeric-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_control_char() {
-    let html = "&#127;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#127;
-    ^^^^^^ control-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_missing_semicolon() {
-    let html = "&not";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &not
-        ^ missing-semicolon-after-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_noncharacter() {
-    let html = "&#xFDD0;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#xFDD0;
-    ^^^^^^^^ noncharacter-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_null_char() {
-    let html = "&#0;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#0;
-    ^^^^ null-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_outside_unicode_range() {
-    let html = "&#9999999;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#9999999;
-    ^^^^^^^^^^ character-reference-outside-unicode-range
-    "###);
-}
-
-#[test]
-fn error_char_ref_surrogate() {
-    let html = "&#xD800;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    &#xD800;
-    ^^^^^^^^ surrogate-character-reference
-    "###);
-}
-
-#[test]
-fn error_char_ref_unknown_named() {
-    let html = "The pirate says &arrrrr;";
-    assert_snapshot!(annotate_errors(html), @r###"
-    The pirate says &arrrrr;
-                    ^^^^^^^^ unknown-named-character-reference
-    "###);
-}
-
-#[test]
-fn error_duplicate_attribute() {
-    let html = "Does this open two pages? <a href=foo.html href=bar.html>";
-    assert_snapshot!(annotate_errors(html), @r###"
-    Does this open two pages? <a href=foo.html href=bar.html>
-                                               ^^^^ duplicate-attribute
-    "###);
-}
-
-#[test]
-fn error_end_tag_with_attributes() {
-    let html = "</end-tag first second=value>";
-    assert_snapshot!(annotate_errors(html), @r###"
-    </end-tag first second=value>
-                    ^^^^^^ end-tag-with-attributes
-    "###);
-}
-
-#[test]
-fn error_end_tag_with_trailing_solidus() {
-    let html = "Do you start or do you end? </yes/>";
-    assert_snapshot!(annotate_errors(html), @r###"
-    Do you start or do you end? </yes/>
-                                     ^ end-tag-with-trailing-solidus
-    "###);
-}
-
-#[test]
-fn error_eof_before_tag_name() {
-    let html = "<";
-    assert_snapshot!(annotate_errors(html), @r###"
-    <
-     ^ eof-before-tag-name
-    "###);
-}
-
-// TODO: add error_eof_in_cdata test
-// blocked by lack of proper tree constructor (NaiveParser doesn't parse CDATA sections)
-
-#[test]
-fn error_eof_in_comment() {
-    let html = "<!--";
-    assert_snapshot!(annotate_errors(html), @r###"
-    <!--
-        ^ eof-in-comment
-    "###);
-}
-
-#[test]
-fn error_eof_in_doctype() {
-    let html = "<!doctype html";
-    assert_snapshot!(annotate_errors(html), @r###"
-    <!doctype html
-                  ^ eof-in-doctype
-    "###);
-}
-
-#[test]
-fn error_eof_in_script_html_comment_like_text() {
-    let html = "<script><!--";
-    assert_snapshot!(annotate_errors(html), @r###"
-    <script><!--
-                ^ eof-in-script-html-comment-like-text
-    "###);
-}
-
-#[test]
-fn error_eof_in_tag() {
-    let html = "</sarcasm";
-    assert_snapshot!(annotate_errors(html), @r###"
-    </sarcasm
-             ^ eof-in-tag
-    "###);
-}
-
-#[test]
-fn error_invalid_first_character_of_tag_name() {
-    let html = "Please mind the gap: < test";
-    assert_snapshot!(annotate_errors(html), @r###"
-    Please mind the gap: < test
-                          ^ invalid-first-character-of-tag-name
-    "###);
-}
-
-fn assert_char_encoding_independence<S: AsRef<str> + Clone>(
-    html: &'static str,
-    labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>,
-) {
-    let utf8_labels = labeler(parser(html));
-    let utf16_labels = labeler(parser(Utf16Reader(html.into_reader())));
-
-    for (idx, (span, _)) in utf16_labels.into_iter().enumerate() {
-        let expected_utf16_span = Range {
-            start: html[..utf8_labels[idx].0.start].encode_utf16().count() * 2,
-            end: html[..utf8_labels[idx].0.end].encode_utf16().count() * 2,
-        };
-        assert_eq!(
-            span,
-            expected_utf16_span,
-            "UTF-16 span didn't match the UTF-8 span, which looks like:\n{}",
-            annotate(html, vec![utf8_labels[idx].clone()])
-        );
-    }
-}
-
-struct Utf16Reader<'a>(html5tokenizer::reader::StringReader<'a>);
-
-impl html5tokenizer::reader::Reader for Utf16Reader<'_> {
-    type Error = std::convert::Infallible;
-
-    fn read_char(&mut self) -> Result<Option<char>, Self::Error> {
-        self.0.read_char()
-    }
-
-    fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> {
-        self.0.try_read_string(s, case_sensitive)
-    }
-
-    fn len_of_char_in_current_encoding(&self, c: char) -> usize {
-        c.len_utf16() * 2
-    }
-}