diff options
| -rw-r--r-- | CHANGELOG.md | 2 | ||||
| -rw-r--r-- | src/machine.rs | 1 | ||||
| -rw-r--r-- | src/tokenizer.rs | 9 | ||||
| -rw-r--r-- | tests/test_spans.rs | 14 | 
4 files changed, 19 insertions, 7 deletions
| diff --git a/CHANGELOG.md b/CHANGELOG.md index 61d8138..474460f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,8 @@    (`eof-*`, `end-tag-with-trailing-solidus`,     `missing-semicolon-after-character-reference`). +* Fixed most error spans about character references being too small. +  ### 0.5.0 - 2023-08-19  #### Features diff --git a/src/machine.rs b/src/machine.rs index 149b64d..84fb525 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1659,6 +1659,7 @@ where              }          },          State::CharacterReference => { +            slf.some_offset = slf.reader.position() - "&".len();              slf.temporary_buffer.clear();              slf.temporary_buffer.push('&');              match slf.read_char()? { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f41f5ae..cecc0f6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -50,6 +50,7 @@ pub struct Tokenizer<R: Reader, O, E: Emitter<O>> {      last_start_tag_name: String,      is_start_tag: bool,      /// * Set to the start offset of `<!doctype` in [`InternalState::MarkupDeclarationOpen`]. +    /// * Set to the offset of `&` in [`InternalState::CharacterReference`].      pub(crate) some_offset: O,      /// This boolean flag exists so that the [`NaiveParser`](crate::NaiveParser) can work with any [`Emitter`]      /// (it cannot call [`Tokenizer::set_state`] using the emitted start tags since they can be of an arbitrary type). @@ -189,6 +190,14 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {              | Error::MissingSemicolonAfterCharacterReference => {                  self.reader.position()..self.reader.position()              } +            Error::AbsenceOfDigitsInNumericCharacterReference +            | Error::NullCharacterReference +            | Error::CharacterReferenceOutsideUnicodeRange +            | Error::SurrogateCharacterReference +            | Error::NoncharacterCharacterReference +            | Error::ControlCharacterReference +            | Error::UnknownNamedCharacterReference => self.some_offset..self.reader.position(), +              _ => self.reader.position() - 1..self.reader.position(),          };          self.emitter.emit_error(error, span); diff --git a/tests/test_spans.rs b/tests/test_spans.rs index f22a2aa..7eb7b8a 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -249,7 +249,7 @@ fn error_char_ref_absence_of_digits() {      let html = "&#qux;";      assert_snapshot!(annotate_errors(html), @r###"      &#qux; -      ^ absence-of-digits-in-numeric-character-reference +    ^^^ absence-of-digits-in-numeric-character-reference      "###);  } @@ -258,7 +258,7 @@ fn error_char_ref_control_char() {      let html = "";      assert_snapshot!(annotate_errors(html), @r###"       -         ^ control-character-reference +    ^^^^^^ control-character-reference      "###);  } @@ -276,7 +276,7 @@ fn error_char_ref_noncharacter() {      let html = "";      assert_snapshot!(annotate_errors(html), @r###"       -           ^ noncharacter-character-reference +    ^^^^^^^^ noncharacter-character-reference      "###);  } @@ -285,7 +285,7 @@ fn error_char_ref_null_char() {      let html = "�";      assert_snapshot!(annotate_errors(html), @r###"      � -       ^ null-character-reference +    ^^^^ null-character-reference      "###);  } @@ -294,7 +294,7 @@ fn error_char_ref_outside_unicode_range() {      let html = "�";      assert_snapshot!(annotate_errors(html), @r###"      � -             ^ character-reference-outside-unicode-range +    ^^^^^^^^^^ character-reference-outside-unicode-range      "###);  } @@ -303,7 +303,7 @@ fn error_char_ref_surrogate() {      let html = "�";      assert_snapshot!(annotate_errors(html), @r###"      � -           ^ surrogate-character-reference +    ^^^^^^^^ surrogate-character-reference      "###);  } @@ -312,7 +312,7 @@ fn error_char_ref_unknown_named() {      let html = "The pirate says &arrrrr;";      assert_snapshot!(annotate_errors(html), @r###"      The pirate says &arrrrr; -                           ^ unknown-named-character-reference +                    ^^^^^^^^ unknown-named-character-reference      "###);  } | 
