diff options
| -rw-r--r-- | Cargo.toml | 2 | ||||
| -rw-r--r-- | tests/span-tests/demo.html | 15 | ||||
| -rw-r--r-- | tests/span-tests/demo.out | 29 | ||||
| -rw-r--r-- | tests/test_spans.rs | 200 | 
4 files changed, 153 insertions, 93 deletions
| @@ -16,6 +16,8 @@ include = ["src/**/*", "LICENSE", "README.md"]  [dev-dependencies]  codespan-reporting = "0.11.1" +insta = "1.31.0" +pretty_assertions = "1.0.0"  [features]  # Feature used by integration tests in tests/ to get access to library internals. diff --git a/tests/span-tests/demo.html b/tests/span-tests/demo.html deleted file mode 100644 index 07c305b..0000000 --- a/tests/span-tests/demo.html +++ /dev/null @@ -1,15 +0,0 @@ -this is a tag: <h1>test</h1> - -tags can have attributes: <div id = foobar> - -Attribute values can be quoted: <input name = 'age' type = "number"> - -But you cannot put attributes everywhere: </nope data=foobar> - -Please mind the gap: < test - -The pirate says &arrrrr; - -Does this open two pages? <a href=foo.html href=bar.html>click me</a> - -Do you start or do you end? </yes/> diff --git a/tests/span-tests/demo.out b/tests/span-tests/demo.out deleted file mode 100644 index 22513f7..0000000 --- a/tests/span-tests/demo.out +++ /dev/null @@ -1,29 +0,0 @@ -note: -   ┌─ test.html:1:17 -   │ - 1 │ this is a tag: <h1>test</h1> -   │                 ^^       ^^ end tag -   │                 │ -   │                 start tag -   · - 5 │ Attribute values can be quoted: <input name = 'age' type = "number"> -   │                                        ^^^^    ^^^  ^^^^    ^^^^^^ attr value -   │                                        │       │    │ -   │                                        │       │    attr name -   │                                        │       attr value -   │                                        attr name - 6 │ - 7 │ But you cannot put attributes everywhere: </nope data=foobar> -   │                                                  ^^^^ end-tag-with-attributes - 8 │ - 9 │ Please mind the gap: < test -   │                       ^ invalid-first-character-of-tag-name -10 │ -11 │ The pirate says &arrrrr; -   │                        ^ unknown-named-character-reference -12 │ -13 │ Does this open two pages? <a href=foo.html href=bar.html>click me</a> -   │                                            ^^^^ duplicate-attribute -14 │ -15 │ Do you start or do you end? </yes/> -   │                                   ^ end-tag-with-trailing-solidus diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 8a820de..f8e54a2 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -1,4 +1,4 @@ -use std::{include_str, ops::Range}; +use std::ops::Range;  use codespan_reporting::{      self, @@ -7,66 +7,168 @@ use codespan_reporting::{      term::{self, termcolor::Buffer},  };  use html5tokenizer::{spans::PosTrackingReader, DefaultEmitter, Token, Tokenizer}; +use insta::assert_snapshot; +use pretty_assertions::assert_eq; -#[test] -fn test() { -    let html = include_str!("span-tests/demo.html"); - -    let mut files = SimpleFiles::new(); -    let file_id = files.add("test.html", html); -    let mut labels = Vec::new(); - -    for token in Tokenizer::new( +fn tokenizer(html: &'static str) -> impl Iterator<Item = Token<Range<usize>>> { +    Tokenizer::new(          PosTrackingReader::new(html),          DefaultEmitter::<_, Range<usize>>::default(),      )      .flatten() -    { -        if let Token::StartTag(tag) = token { -            if tag.name == "h1" { -                labels.push(Label::primary(file_id, tag.name_span).with_message("start tag")); -            } else if tag.name == "input" { -                for attr in tag.attributes.values() { -                    labels.push( -                        Label::primary(file_id, attr.name_span.clone()).with_message("attr name"), -                    ); -                    labels.push( -                        Label::primary(file_id, attr.value_span.clone()).with_message("attr value"), -                    ); -                } -            } -        } else if let Token::EndTag(tag) = token { -            if tag.name == "h1" { -                labels.push(Label::primary(file_id, tag.name_span).with_message("end tag")); -            } -        } else if let Token::Error { error, span } = token { -            labels.push(Label::primary(file_id, span).with_message(error.to_string())); -        } -    } +} -    let diagnostic = Diagnostic::note().with_labels(labels); +fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String { +    let mut files = SimpleFiles::new(); +    let file_id = files.add("test.html", html); + +    let diagnostic = Diagnostic::note().with_labels( +        labels +            .into_iter() +            .map(|(span, text)| Label::primary(file_id, span).with_message(text.as_ref())) +            .collect(), +    );      let mut writer = Buffer::no_color();      let config = codespan_reporting::term::Config::default();      term::emit(&mut writer, &config, &files, &diagnostic).unwrap(); +    let msg = std::str::from_utf8(writer.as_slice()).unwrap(); + +    // strip the filename and the line numbers since we don't need them +    // (apparently they cannot be disabled in codespan_reporting) +    msg.lines() +        .skip(3) +        .flat_map(|l| l.split_once("│ ").map(|s| s.1.trim_end())) +        .collect::<Vec<_>>() +        .join("\n") +} -    let actual = remove_trailing_spaces(std::str::from_utf8(writer.as_slice()).unwrap()); -    let expected = include_str!("span-tests/demo.out"); - -    if actual != expected { -        println!( -            "EXPECTED:\n{banner}\n{expected}{banner}\n\nACTUAL OUTPUT:\n{banner}\n{actual}{banner}", -            banner = "-".repeat(30), -            expected = expected, -            actual = actual -        ); -        panic!("failed"); +#[test] +fn start_tag_name_span() { +    let html = "<x> <xyz> <xyz  > <xyz/>"; +    let mut labels = Vec::new(); +    for token in tokenizer(html) { +        if let Token::StartTag(tag) = token { +            labels.push((tag.name_span, "")); +        }      } +    assert_snapshot!(annotate(html, labels), @r###" +    <x> <xyz> <xyz  > <xyz/> +     ^   ^^^   ^^^     ^^^ +    "###);  } -fn remove_trailing_spaces(text: &str) -> String { -    text.lines() -        .map(|l| l.trim_end()) -        .collect::<Vec<_>>() -        .join("\n") +#[test] +fn end_tag_name_span() { +    let html = "</x> </xyz> </xyz  > </xyz/>"; +    let mut labels = Vec::new(); +    for token in tokenizer(html) { +        if let Token::EndTag(tag) = token { +            labels.push((tag.name_span, "")); +        } +    } +    assert_snapshot!(annotate(html, labels), @r###" +    </x> </xyz> </xyz  > </xyz/> +      ^    ^^^    ^^^      ^^^ +    "###); +} + +#[test] +fn attribute_name_span() { +    let html = "<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>"; +    let mut labels = Vec::new(); +    let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { +        panic!("expected start tag") +    }; +    for (_name, attr) in tag.attributes { +        labels.push((attr.name_span, "")); +    } +    assert_snapshot!(annotate(html, labels), @r###" +    <test x xyz y=VAL xy=VAL z = VAL yzx = VAL> +          ^ ^^^ ^     ^^     ^       ^^^ +    "###); +} + +#[test] +fn attribute_value_span() { +    let html = "<test x=unquoted y = unquoted z='single-quoted' zz=\"double-quoted\" empty=''>"; +    let mut labels = Vec::new(); +    let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { +        panic!("expected start tag") +    }; +    for (_name, attr) in tag.attributes { +        labels.push((attr.value_span, "")); +    } +    assert_snapshot!(annotate(html, labels), @r###" +    <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''> +            ^^^^^^^^     ^^^^^^^^    ^^^^^^^^^^^^^      ^^^^^^^^^^^^^         ^ +    "###); +} + +fn annotate_errors(html: &'static str) -> String { +    let mut labels = Vec::new(); +    for token in tokenizer(html) { +        if let Token::Error { error, span } = token { +            labels.push((span, error.to_string())); +        } +    } +    annotate(html, labels) +} + +#[test] +fn tests_for_errors_are_sorted() { +    let source_of_this_file = std::fs::read_to_string(file!()).unwrap(); +    let mut error_tests: Vec<_> = source_of_this_file +        .lines() +        .filter(|l| l.starts_with("fn error_")) +        .collect(); +    let error_tests_found_order = error_tests.join("\n"); +    error_tests.sort(); +    let error_tests_sorted = error_tests.join("\n"); +    assert_eq!(error_tests_found_order, error_tests_sorted); +} + +#[test] +fn error_duplicate_attribute() { +    let html = "Does this open two pages? <a href=foo.html href=bar.html>"; +    assert_snapshot!(annotate_errors(html), @r###" +    Does this open two pages? <a href=foo.html href=bar.html> +                                               ^^^^ duplicate-attribute +    "###); +} + +#[test] +fn error_end_tag_with_attributes() { +    let html = "</end-tag first second=value>"; +    assert_snapshot!(annotate_errors(html), @r###" +    </end-tag first second=value> +                    ^^^^^^ end-tag-with-attributes +    "###); +} + +#[test] +fn error_end_tag_with_trailing_solidus() { +    let html = "Do you start or do you end? </yes/>"; +    assert_snapshot!(annotate_errors(html), @r###" +    Do you start or do you end? </yes/> +                                      ^ end-tag-with-trailing-solidus +    "###); +} + +#[test] +fn error_invalid_first_character_of_tag_name() { +    let html = "Please mind the gap: < test"; +    assert_snapshot!(annotate_errors(html), @r###" +    Please mind the gap: < test +                          ^ invalid-first-character-of-tag-name +    "###); +} + +#[test] +fn error_unknown_named_character_reference() { +    let html = "The pirate says &arrrrr;"; +    assert_snapshot!(annotate_errors(html), @r###" +    The pirate says &arrrrr; +                           ^ unknown-named-character-reference +    "###);  } | 
