From 822cc78d438e8df6a068d811d3cc825b883aba36 Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Fri, 1 Sep 2023 19:58:33 +0200 Subject: fix: too small char ref error spans --- CHANGELOG.md | 2 ++ src/machine.rs | 1 + src/tokenizer.rs | 9 +++++++++ tests/test_spans.rs | 14 +++++++------- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61d8138..474460f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,8 @@ (`eof-*`, `end-tag-with-trailing-solidus`, `missing-semicolon-after-character-reference`). +* Fixed most error spans about character references being too small. + ### 0.5.0 - 2023-08-19 #### Features diff --git a/src/machine.rs b/src/machine.rs index 149b64d..84fb525 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1659,6 +1659,7 @@ where } }, State::CharacterReference => { + slf.some_offset = slf.reader.position() - "&".len(); slf.temporary_buffer.clear(); slf.temporary_buffer.push('&'); match slf.read_char()? { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f41f5ae..cecc0f6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -50,6 +50,7 @@ pub struct Tokenizer> { last_start_tag_name: String, is_start_tag: bool, /// * Set to the start offset of `, O: Offset, E: Emitter> Tokenizer { | Error::MissingSemicolonAfterCharacterReference => { self.reader.position()..self.reader.position() } + Error::AbsenceOfDigitsInNumericCharacterReference + | Error::NullCharacterReference + | Error::CharacterReferenceOutsideUnicodeRange + | Error::SurrogateCharacterReference + | Error::NoncharacterCharacterReference + | Error::ControlCharacterReference + | Error::UnknownNamedCharacterReference => self.some_offset..self.reader.position(), + _ => self.reader.position() - 1..self.reader.position(), }; self.emitter.emit_error(error, span); diff --git a/tests/test_spans.rs b/tests/test_spans.rs index f22a2aa..7eb7b8a 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -249,7 +249,7 @@ fn error_char_ref_absence_of_digits() { let html = "&#qux;"; assert_snapshot!(annotate_errors(html), @r###" &#qux; - ^ absence-of-digits-in-numeric-character-reference + ^^^ absence-of-digits-in-numeric-character-reference "###); } @@ -258,7 +258,7 @@ fn error_char_ref_control_char() { let html = ""; assert_snapshot!(annotate_errors(html), @r###"  - ^ control-character-reference + ^^^^^^ control-character-reference "###); } @@ -276,7 +276,7 @@ fn error_char_ref_noncharacter() { let html = "﷐"; assert_snapshot!(annotate_errors(html), @r###" ﷐ - ^ noncharacter-character-reference + ^^^^^^^^ noncharacter-character-reference "###); } @@ -285,7 +285,7 @@ fn error_char_ref_null_char() { let html = "�"; assert_snapshot!(annotate_errors(html), @r###" � - ^ null-character-reference + ^^^^ null-character-reference "###); } @@ -294,7 +294,7 @@ fn error_char_ref_outside_unicode_range() { let html = "�"; assert_snapshot!(annotate_errors(html), @r###" � - ^ character-reference-outside-unicode-range + ^^^^^^^^^^ character-reference-outside-unicode-range "###); } @@ -303,7 +303,7 @@ fn error_char_ref_surrogate() { let html = "�"; assert_snapshot!(annotate_errors(html), @r###" � - ^ surrogate-character-reference + ^^^^^^^^ surrogate-character-reference "###); } @@ -312,7 +312,7 @@ fn error_char_ref_unknown_named() { let html = "The pirate says &arrrrr;"; assert_snapshot!(annotate_errors(html), @r###" The pirate says &arrrrr; - ^ unknown-named-character-reference + ^^^^^^^^ unknown-named-character-reference "###); } -- cgit v1.2.3