use std::ops::Range; use codespan_reporting::{ self, diagnostic::{Diagnostic, Label}, files::SimpleFiles, term::{self, termcolor::Buffer}, }; use html5tokenizer::{offset::PosTrackingReader, NaiveParser, Token}; use insta::assert_snapshot; use similar_asserts::assert_eq; fn tokenizer(html: &'static str) -> impl Iterator> { NaiveParser::new(PosTrackingReader::new(html)).flatten() } fn annotate(html: &str, labels: Vec<(Range, impl AsRef)>) -> String { let mut files = SimpleFiles::new(); let file_id = files.add("test.html", html); let diagnostic = Diagnostic::note().with_labels( labels .into_iter() .map(|(span, text)| Label::primary(file_id, span).with_message(text.as_ref())) .collect(), ); let mut writer = Buffer::no_color(); let config = codespan_reporting::term::Config::default(); term::emit(&mut writer, &config, &files, &diagnostic).unwrap(); let msg = std::str::from_utf8(writer.as_slice()).unwrap(); // strip the filename and the line numbers since we don't need them // (apparently they cannot be disabled in codespan_reporting) msg.lines() .skip(3) .flat_map(|l| l.split_once("│ ").map(|s| s.1.trim_end())) .collect::>() .join("\n") } #[test] fn start_tag_span() { let html = " "; let mut labels = Vec::new(); for token in tokenizer(html) { if let Token::StartTag(tag) = token { labels.push((tag.span, "")); } } assert_snapshot!(annotate(html, labels), @r###" ^^^ ^^^^^ ^^^^^^^ ^^^^^^ "###); } #[test] fn end_tag_span() { let html = " "; let mut labels = Vec::new(); for token in tokenizer(html) { if let Token::EndTag(tag) = token { labels.push((tag.span, "")); } } assert_snapshot!(annotate(html, labels), @r###" ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^ "###); } #[test] fn start_tag_name_span() { let html = " "; let mut labels = Vec::new(); for token in tokenizer(html) { if let Token::StartTag(tag) = token { labels.push((tag.name_span(), "")); } } assert_snapshot!(annotate(html, labels), @r###" ^ ^^^ ^^^ ^^^ "###); } #[test] fn end_tag_name_span() { let html = " "; let mut labels = Vec::new(); for token in tokenizer(html) { if let Token::EndTag(tag) = token { labels.push((tag.name_span(), "")); } } assert_snapshot!(annotate(html, labels), @r###" ^ ^^^ ^^^ ^^^ "###); } #[test] fn attribute_name_span() { let html = ""; let mut labels = Vec::new(); let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { panic!("expected start tag") }; for attr in &tag.attributes { labels.push((attr.name_span(), "")); } assert_snapshot!(annotate(html, labels), @r###" ^ ^^^ ^ ^^ ^ ^^^ "###); } #[test] fn attribute_value_span() { let html = ""; let mut labels = Vec::new(); let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { panic!("expected start tag") }; for attr in &tag.attributes { labels.push((attr.value_span().unwrap(), "")); } assert_snapshot!(annotate(html, labels), @r###" ^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^ "###); } #[test] fn attribute_value_with_char_ref() { let html = ""; let mut labels = Vec::new(); let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { panic!("expected start tag") }; for attr in &tag.attributes { labels.push((attr.value_span().unwrap(), "")); } assert_snapshot!(annotate(html, labels), @r###" ^^^^^ ^^^^^ ^^^^^ "###); } #[test] fn comment_proper_data_span() { let html = ""; let Token::Comment(comment) = tokenizer(html).next().unwrap() else { panic!("expected comment"); }; assert_eq!(comment.data, html[comment.data_span()]); let labels = vec![(comment.data_span(), "")]; assert_snapshot!(annotate(html, labels), @r###" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ "###); } #[test] fn comment_bogus_data_span() { let html = ""; let Token::Comment(comment) = tokenizer(html) .filter(|t| !matches!(t, Token::Error { .. })) .next() .unwrap() else { panic!("expected comment"); }; assert_eq!(comment.data, html[comment.data_span()]); let labels = vec![(comment.data_span(), "")]; assert_snapshot!(annotate(html, labels), @r###" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ "###); } #[test] fn doctype_span() { let html = r#""#; let Token::Doctype(doctype) = tokenizer(html).next().unwrap() else { panic!("expected doctype"); }; let labels = vec![(doctype.span, "")]; assert_snapshot!(annotate(html, labels), @r###" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ "###); } #[test] fn doctype_id_spans() { let html = r#""#; let Token::Doctype(doctype) = tokenizer(html).next().unwrap() else { panic!("expected doctype"); }; let labels = vec![ (doctype.public_id_span().unwrap(), "public id"), (doctype.system_id_span().unwrap(), "system id"), ]; assert_snapshot!(annotate(html, labels), @r###" ^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id │ public id "###); } fn annotate_errors(html: &'static str) -> String { let mut labels = Vec::new(); for token in tokenizer(html) { let Token::Error { error, span } = token else { continue; }; if span.start == span.end { if span.start != html.len() { panic!("empty error spans are only allowed at the very end of the source (for eof errors)"); } } else { assert!(span.start < span.end); assert!(span.end <= html.len()); } labels.push((span, error.code())); } annotate(html, labels) } #[test] fn tests_for_errors_are_sorted() { let source_of_this_file = std::fs::read_to_string(file!()).unwrap(); let mut error_tests: Vec<_> = source_of_this_file .lines() .filter(|l| l.starts_with("fn error_")) .collect(); let error_tests_found_order = error_tests.join("\n"); error_tests.sort(); let error_tests_sorted = error_tests.join("\n"); assert_eq!(error_tests_found_order, error_tests_sorted); } #[test] fn error_char_ref_missing_semicolon() { let html = "¬"; assert_snapshot!(annotate_errors(html), @r###" ¬ ^ missing-semicolon-after-character-reference "###); } #[test] fn error_char_ref_unknown_named() { let html = "The pirate says &arrrrr;"; assert_snapshot!(annotate_errors(html), @r###" The pirate says &arrrrr; ^ unknown-named-character-reference "###); } #[test] fn error_duplicate_attribute() { let html = "Does this open two pages? "; assert_snapshot!(annotate_errors(html), @r###" Does this open two pages? ^^^^ duplicate-attribute "###); } #[test] fn error_end_tag_with_attributes() { let html = ""; assert_snapshot!(annotate_errors(html), @r###" ^^^^^^ end-tag-with-attributes "###); } #[test] fn error_end_tag_with_trailing_solidus() { let html = "Do you start or do you end? "; assert_snapshot!(annotate_errors(html), @r###" Do you start or do you end? ^ end-tag-with-trailing-solidus "###); } #[test] fn error_eof_before_tag_name() { let html = "<"; assert_snapshot!(annotate_errors(html), @r###" < ^ eof-before-tag-name "###); } // TODO: add error_eof_in_cdata test // blocked by lack of proper tree constructor (NaiveParser doesn't parse CDATA sections) #[test] fn error_eof_in_comment() { let html = "