4 files changed, 19 insertions, 7 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 61d8138..474460f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,8 @@
   (`eof-*`, `end-tag-with-trailing-solidus`,
    `missing-semicolon-after-character-reference`).
 
+* Fixed most error spans about character references being too small.
+
 ### 0.5.0 - 2023-08-19
 
 #### Features
diff --git a/src/machine.rs b/src/machine.rs
index 149b64d..84fb525 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1659,6 +1659,7 @@ where
             }
         },
         State::CharacterReference => {
+            slf.some_offset = slf.reader.position() - "&".len();
             slf.temporary_buffer.clear();
             slf.temporary_buffer.push('&');
             match slf.read_char()? {
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index f41f5ae..cecc0f6 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -50,6 +50,7 @@ pub struct Tokenizer<R: Reader, O, E: Emitter<O>> {
     last_start_tag_name: String,
     is_start_tag: bool,
     /// * Set to the start offset of `<!doctype` in [`InternalState::MarkupDeclarationOpen`].
+    /// * Set to the offset of `&` in [`InternalState::CharacterReference`].
     pub(crate) some_offset: O,
     /// This boolean flag exists so that the [`NaiveParser`](crate::NaiveParser) can work with any [`Emitter`]
     /// (it cannot call [`Tokenizer::set_state`] using the emitted start tags since they can be of an arbitrary type).
@@ -189,6 +190,14 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
             | Error::MissingSemicolonAfterCharacterReference => {
                 self.reader.position()..self.reader.position()
             }
+            Error::AbsenceOfDigitsInNumericCharacterReference
+            | Error::NullCharacterReference
+            | Error::CharacterReferenceOutsideUnicodeRange
+            | Error::SurrogateCharacterReference
+            | Error::NoncharacterCharacterReference
+            | Error::ControlCharacterReference
+            | Error::UnknownNamedCharacterReference => self.some_offset..self.reader.position(),
+
             _ => self.reader.position() - 1..self.reader.position(),
         };
         self.emitter.emit_error(error, span);
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index f22a2aa..7eb7b8a 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -249,7 +249,7 @@ fn error_char_ref_absence_of_digits() {
     let html = "&#qux;";
     assert_snapshot!(annotate_errors(html), @r###"
     &#qux;
-      ^ absence-of-digits-in-numeric-character-reference
+    ^^^ absence-of-digits-in-numeric-character-reference
     "###);
 }
 
@@ -258,7 +258,7 @@ fn error_char_ref_control_char() {
     let html = "&#127;";
     assert_snapshot!(annotate_errors(html), @r###"
     &#127;
-         ^ control-character-reference
+    ^^^^^^ control-character-reference
     "###);
 }
 
@@ -276,7 +276,7 @@ fn error_char_ref_noncharacter() {
     let html = "&#xFDD0;";
     assert_snapshot!(annotate_errors(html), @r###"
     &#xFDD0;
-           ^ noncharacter-character-reference
+    ^^^^^^^^ noncharacter-character-reference
     "###);
 }
 
@@ -285,7 +285,7 @@ fn error_char_ref_null_char() {
     let html = "&#0;";
     assert_snapshot!(annotate_errors(html), @r###"
     &#0;
-       ^ null-character-reference
+    ^^^^ null-character-reference
     "###);
 }
 
@@ -294,7 +294,7 @@ fn error_char_ref_outside_unicode_range() {
     let html = "&#9999999;";
     assert_snapshot!(annotate_errors(html), @r###"
     &#9999999;
-             ^ character-reference-outside-unicode-range
+    ^^^^^^^^^^ character-reference-outside-unicode-range
     "###);
 }
 
@@ -303,7 +303,7 @@ fn error_char_ref_surrogate() {
     let html = "&#xD800;";
     assert_snapshot!(annotate_errors(html), @r###"
     &#xD800;
-           ^ surrogate-character-reference
+    ^^^^^^^^ surrogate-character-reference
     "###);
 }
 
@@ -312,7 +312,7 @@ fn error_char_ref_unknown_named() {
     let html = "The pirate says &arrrrr;";
     assert_snapshot!(annotate_errors(html), @r###"
     The pirate says &arrrrr;
-                           ^ unknown-named-character-reference
+                    ^^^^^^^^ unknown-named-character-reference
     "###);
 }