diff options
Diffstat (limited to 'tests/test_spans.rs')
-rw-r--r-- | tests/test_spans.rs | 626 |
1 files changed, 0 insertions, 626 deletions
diff --git a/tests/test_spans.rs b/tests/test_spans.rs deleted file mode 100644 index b10808c..0000000 --- a/tests/test_spans.rs +++ /dev/null @@ -1,626 +0,0 @@ -use std::convert::Infallible; -use std::ops::Range; - -use codespan_reporting::{ - self, - diagnostic::{Diagnostic, Label}, - files::SimpleFiles, - term::{self, termcolor::Buffer}, -}; -use html5tokenizer::{ - offset::PosTrackingReader, - reader::{IntoReader, Reader}, - trace::Trace, - NaiveParser, Token, -}; -use insta::assert_snapshot; -use similar_asserts::assert_eq; - -/// Just a convenient type alias for labeler closures since Rust -/// apparently cannot infer the type (requiring an annotation). -type Parser = NaiveParser< - PosTrackingReader<Box<dyn Reader<Error = Infallible>>>, - usize, - html5tokenizer::TracingEmitter, ->; - -fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser -where - R: Reader<Error = Infallible> + 'static, -{ - NaiveParser::new_with_emitter( - PosTrackingReader::new( - Box::new(reader.into_reader()) as Box<dyn Reader<Error = Infallible>> - ), - html5tokenizer::TracingEmitter::default(), - ) -} - -fn test_and_annotate<S: AsRef<str> + Clone>( - html: &'static str, - labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>, -) -> String { - let labels = labeler(parser(html)); - - assert_char_encoding_independence(html, labeler); - - annotate(html, labels) -} - -fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String { - let mut files = SimpleFiles::new(); - let file_id = files.add("test.html", html); - - let diagnostic = Diagnostic::note().with_labels( - labels - .into_iter() - .map(|(span, text)| Label::primary(file_id, span).with_message(text.as_ref())) - .collect(), - ); - - let mut writer = Buffer::no_color(); - let config = codespan_reporting::term::Config::default(); - term::emit(&mut writer, &config, &files, &diagnostic).unwrap(); - let msg = std::str::from_utf8(writer.as_slice()).unwrap(); - - // strip the filename and the line numbers since we don't need them - // (apparently they cannot be disabled in codespan_reporting) - msg.lines() - .skip(3) - .flat_map(|l| l.split_once("│ ").map(|s| format!("{}\n", s.1.trim_end()))) - .collect::<Vec<_>>() - .join("") -} - -#[test] -fn char_span() { - let html = "X & &doesntexist; ѣ </"; - let labeler = |parser: Parser| { - let mut labels = Vec::new(); - for token_trace in parser.flatten() { - if let (Token::Char(c), Trace::Char(span)) = token_trace { - if c != ' ' { - labels.push((span, "")); - } - } - } - labels - }; - assert_snapshot!(test_and_annotate(html, labeler), @r###" - X & &doesntexist; ѣ </ - ^ ^^^^^ ^^^^^^^^^^^^^ ^^^^^^^ ^^ - "###); -} - -#[test] -fn start_tag_span() { - let html = "<x> <xyz> <xyz > <xyz/>"; - let labeler = |parser: Parser| { - let mut labels = Vec::new(); - for (_, trace) in parser.flatten() { - if let Trace::StartTag(trace) = trace { - labels.push((trace.span, "")); - } - } - labels - }; - assert_snapshot!(test_and_annotate(html, labeler), @r###" - <x> <xyz> <xyz > <xyz/> - ^^^ ^^^^^ ^^^^^^^ ^^^^^^ - "###); -} - -#[test] -fn end_tag_span() { - let html = "</x> </xyz> </xyz > </xyz/>"; - let labeler = |parser: Parser| { - let mut labels = Vec::new(); - for (_, trace) in parser.flatten() { - if let Trace::EndTag(trace) = trace { - labels.push((trace.span, "")); - } - } - labels - }; - assert_snapshot!(test_and_annotate(html, labeler), @r###" - </x> </xyz> </xyz > </xyz/> - ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^ - "###); -} - -#[test] -fn start_tag_name_span() { - let html = "<x> <xyz> <xyz > <xyz/>"; - let labeler = |parser: Parser| { - let mut labels = Vec::new(); - for (_, trace) in parser.flatten() { - if let Trace::StartTag(trace) = trace { - labels.push((trace.name_span, "")); - } - } - labels - }; - assert_snapshot!(test_and_annotate(html, labeler), @r###" - <x> <xyz> <xyz > <xyz/> - ^ ^^^ ^^^ ^^^ - "###); -} - -#[test] -fn end_tag_name_span() { - let html = "</x> </xyz> </xyz > </xyz/>"; - let labeler = |parser: Parser| { - let mut labels = Vec::new(); - for (_, trace) in parser.flatten() { - if let Trace::EndTag(trace) = trace { - labels.push((trace.name_span, "")); - } - } - labels - }; - assert_snapshot!(test_and_annotate(html, labeler), @r###" - </x> </xyz> </xyz > </xyz/> - ^ ^^^ ^^^ ^^^ - "###); -} - -#[test] -fn attribute_name_span() { - let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>"; - let labeler = |parser: Parser| { - let mut labels = Vec::new(); - let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap() - else { - panic!("expected start tag") - }; - for attr in &tag.attributes { - labels.push(( - trace.attribute_traces[attr.trace_idx().unwrap()].name_span(), - "", - )); - } - labels - }; - assert_snapshot!(test_and_annotate(html, labeler), @r###" - <test x xyz y=VAL xy=VAL z = VAL yzx = VAL> - ^ ^^^ ^ ^^ ^ ^^^ - "###); -} - -#[test] -fn attribute_value_span() { - let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>"; - let labeler = |parser: Parser| { - let mut labels = Vec::new(); - let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap() - else { - panic!("expected start tag") - }; - for attr in &tag.attributes { - labels.push(( - trace.attribute_traces[attr.trace_idx().unwrap()] - .value_span() - .unwrap(), - "", - )); - } - labels - }; - assert_snapshot!(test_and_annotate(html, labeler), @r###" - <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''> - ^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^ - "###); -} - -#[test] -fn attribute_value_with_char_ref() { - let html = "<test x=& y='&' z=\"&\">"; - let labeler = |parser: Parser| { - let mut labels = Vec::new(); - let (Token::StartTag(tag), Trace::StartTag(trace)) = parser.flatten().next().unwrap() - else { - panic!("expected start tag") - }; - for attr in &tag.attributes { - labels.push(( - trace.attribute_traces[attr.trace_idx().unwrap()] - .value_span() - .unwrap(), - "", - )); - } - labels - }; - assert_snapshot!(test_and_annotate(html, labeler), @r###" - <test x=& y='&' z="&"> - ^^^^^ ^^^^^ ^^^^^ - "###); -} - -#[test] -fn comment_data_span() { - #[rustfmt::skip] - let cases = [ - "<!-- Why are you looking at the source code? -->", - "<!-- Why are you looking at the source code? --", - "<!-- Why are you looking at the source code? -", - "<!-- Why are you looking at the source code?", - "<!--", - "<!-->", - "<!---", - "<!--->", - "<!-- Why are you looking at the source code? ->", - "<!-- Why are you looking at the source code? --!>", - "<!-- Why are you looking at the source code? --!", - - // bogus comments - "<! Why are you looking at the source code? -->", - "<!", - ]; - - let mut annotated = String::new(); - for case in cases { - let labeler = |parser: Parser| { - let (_, Trace::Comment(comment)) = parser.flatten().next().unwrap() else { - panic!("expected comment"); - }; - vec![(comment.data_span, "")] - }; - - annotated.push_str(&test_and_annotate(case, labeler)); - } - - assert_snapshot!(annotated, @r###" - <!-- Why are you looking at the source code? --> - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - <!-- Why are you looking at the source code? -- - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - <!-- Why are you looking at the source code? - - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - <!-- Why are you looking at the source code? - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - <!-- - ^ - <!--> - ^ - <!--- - ^ - <!---> - ^ - <!-- Why are you looking at the source code? -> - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - <!-- Why are you looking at the source code? --!> - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - <!-- Why are you looking at the source code? --! - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - <! Why are you looking at the source code? --> - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - <! - ^ - "###); - - for (idx, case) in cases.iter().enumerate() { - let (Token::Comment(data), Trace::Comment(trace)) = parser(*case).flatten().next().unwrap() - else { - panic!("expected comment"); - }; - assert_eq!(case[trace.data_span], data, "case {idx}"); - } -} - -#[test] -fn doctype_span() { - #[rustfmt::skip] - let cases = [ - r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" >"#, - ]; - - let mut annotated = String::new(); - for case in cases { - let labeler = |parser: Parser| { - let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else { - panic!("expected doctype"); - }; - vec![(trace.span(), "")] - }; - annotated.push_str(&test_and_annotate(case, labeler)); - } - - assert_snapshot!(annotated, @r###" - <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" > - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - "###); -} - -#[test] -fn doctype_id_spans() { - #[rustfmt::skip] - let cases = [ - r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#, - ]; - - let mut annotated = String::new(); - for case in cases { - let labeler = |parser: Parser| { - let (_, Trace::Doctype(trace)) = parser.flatten().next().unwrap() else { - panic!("expected doctype"); - }; - - let mut labels = Vec::new(); - if let Some(name_span) = trace.name_span() { - labels.push((name_span, "name")); - } - if let Some(public_id_span) = trace.public_id_span() { - labels.push((public_id_span, "public id")); - } - if let Some(system_id_span) = trace.system_id_span() { - labels.push((system_id_span, "system id")); - } - labels - }; - - annotated.push_str(&test_and_annotate(case, labeler)); - } - - assert_snapshot!(annotated, @r###" - <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> - ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id - │ │ - │ public id - name - "###); -} - -#[test] -fn eof_offset() { - let html = "Where does it end?"; - let labeler = |parser: Parser| { - let mut labels = Vec::new(); - for (_, trace) in parser.flatten() { - if let Trace::EndOfFile(offset) = trace { - labels.push((offset..offset, "here")); - } - } - labels - }; - assert_snapshot!(test_and_annotate(html, labeler), @r###" - Where does it end? - ^ here - "###); -} - -fn annotate_errors(html: &'static str) -> String { - let mut parser = parser(html); - for _ in parser.by_ref() {} - let errors: Vec<_> = parser.emitter_mut().drain_errors().collect(); - - for (_, span) in errors { - if span.start == span.end { - if span.start != html.len() { - panic!("empty error spans are only allowed at the very end of the source (for eof errors)"); - } - } else { - assert!(span.start < span.end); - assert!(span.end <= html.len()); - } - } - - let labeler = |mut parser: Parser| { - let mut labels = Vec::new(); - for _ in parser.by_ref() {} - for (error, span) in parser.emitter_mut().drain_errors() { - labels.push((span, error.code())); - } - labels - }; - - test_and_annotate(html, labeler) -} - -#[test] -fn tests_for_errors_are_sorted() { - let source_of_this_file = std::fs::read_to_string(file!()).unwrap(); - let mut error_tests: Vec<_> = source_of_this_file - .lines() - .filter(|l| l.starts_with("fn error_")) - .collect(); - let error_tests_found_order = error_tests.join("\n"); - error_tests.sort(); - let error_tests_sorted = error_tests.join("\n"); - assert_eq!(error_tests_found_order, error_tests_sorted); -} - -#[test] -fn error_char_ref_absence_of_digits() { - let html = "&#qux;"; - assert_snapshot!(annotate_errors(html), @r###" - &#qux; - ^^^ absence-of-digits-in-numeric-character-reference - "###); -} - -#[test] -fn error_char_ref_control_char() { - let html = ""; - assert_snapshot!(annotate_errors(html), @r###" -  - ^^^^^^ control-character-reference - "###); -} - -#[test] -fn error_char_ref_missing_semicolon() { - let html = "¬"; - assert_snapshot!(annotate_errors(html), @r###" - ¬ - ^ missing-semicolon-after-character-reference - "###); -} - -#[test] -fn error_char_ref_noncharacter() { - let html = ""; - assert_snapshot!(annotate_errors(html), @r###" -  - ^^^^^^^^ noncharacter-character-reference - "###); -} - -#[test] -fn error_char_ref_null_char() { - let html = "�"; - assert_snapshot!(annotate_errors(html), @r###" - � - ^^^^ null-character-reference - "###); -} - -#[test] -fn error_char_ref_outside_unicode_range() { - let html = "�"; - assert_snapshot!(annotate_errors(html), @r###" - � - ^^^^^^^^^^ character-reference-outside-unicode-range - "###); -} - -#[test] -fn error_char_ref_surrogate() { - let html = "�"; - assert_snapshot!(annotate_errors(html), @r###" - � - ^^^^^^^^ surrogate-character-reference - "###); -} - -#[test] -fn error_char_ref_unknown_named() { - let html = "The pirate says &arrrrr;"; - assert_snapshot!(annotate_errors(html), @r###" - The pirate says &arrrrr; - ^^^^^^^^ unknown-named-character-reference - "###); -} - -#[test] -fn error_duplicate_attribute() { - let html = "Does this open two pages? <a href=foo.html href=bar.html>"; - assert_snapshot!(annotate_errors(html), @r###" - Does this open two pages? <a href=foo.html href=bar.html> - ^^^^ duplicate-attribute - "###); -} - -#[test] -fn error_end_tag_with_attributes() { - let html = "</end-tag first second=value>"; - assert_snapshot!(annotate_errors(html), @r###" - </end-tag first second=value> - ^^^^^^ end-tag-with-attributes - "###); -} - -#[test] -fn error_end_tag_with_trailing_solidus() { - let html = "Do you start or do you end? </yes/>"; - assert_snapshot!(annotate_errors(html), @r###" - Do you start or do you end? </yes/> - ^ end-tag-with-trailing-solidus - "###); -} - -#[test] -fn error_eof_before_tag_name() { - let html = "<"; - assert_snapshot!(annotate_errors(html), @r###" - < - ^ eof-before-tag-name - "###); -} - -// TODO: add error_eof_in_cdata test -// blocked by lack of proper tree constructor (NaiveParser doesn't parse CDATA sections) - -#[test] -fn error_eof_in_comment() { - let html = "<!--"; - assert_snapshot!(annotate_errors(html), @r###" - <!-- - ^ eof-in-comment - "###); -} - -#[test] -fn error_eof_in_doctype() { - let html = "<!doctype html"; - assert_snapshot!(annotate_errors(html), @r###" - <!doctype html - ^ eof-in-doctype - "###); -} - -#[test] -fn error_eof_in_script_html_comment_like_text() { - let html = "<script><!--"; - assert_snapshot!(annotate_errors(html), @r###" - <script><!-- - ^ eof-in-script-html-comment-like-text - "###); -} - -#[test] -fn error_eof_in_tag() { - let html = "</sarcasm"; - assert_snapshot!(annotate_errors(html), @r###" - </sarcasm - ^ eof-in-tag - "###); -} - -#[test] -fn error_invalid_first_character_of_tag_name() { - let html = "Please mind the gap: < test"; - assert_snapshot!(annotate_errors(html), @r###" - Please mind the gap: < test - ^ invalid-first-character-of-tag-name - "###); -} - -fn assert_char_encoding_independence<S: AsRef<str> + Clone>( - html: &'static str, - labeler: impl Fn(Parser) -> Vec<(Range<usize>, S)>, -) { - let utf8_labels = labeler(parser(html)); - let utf16_labels = labeler(parser(Utf16Reader(html.into_reader()))); - - for (idx, (span, _)) in utf16_labels.into_iter().enumerate() { - let expected_utf16_span = Range { - start: html[..utf8_labels[idx].0.start].encode_utf16().count() * 2, - end: html[..utf8_labels[idx].0.end].encode_utf16().count() * 2, - }; - assert_eq!( - span, - expected_utf16_span, - "UTF-16 span didn't match the UTF-8 span, which looks like:\n{}", - annotate(html, vec![utf8_labels[idx].clone()]) - ); - } -} - -struct Utf16Reader<'a>(html5tokenizer::reader::StringReader<'a>); - -impl html5tokenizer::reader::Reader for Utf16Reader<'_> { - type Error = std::convert::Infallible; - - fn read_char(&mut self) -> Result<Option<char>, Self::Error> { - self.0.read_char() - } - - fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> { - self.0.try_read_string(s, case_sensitive) - } - - fn len_of_char_in_current_encoding(&self, c: char) -> usize { - c.len_utf16() * 2 - } -} |