diff options
| author | Martin Fischer <martin@push-f.com> | 2023-09-01 19:58:33 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 | 
| commit | 822cc78d438e8df6a068d811d3cc825b883aba36 (patch) | |
| tree | 060564fb15d42a911b7e5a6221e6dbe7a4ee5929 /src | |
| parent | fd43f18f20afcead7f3976ac893a4b5c4155b41f (diff) | |
fix: too small char ref error spans
Diffstat (limited to 'src')
| -rw-r--r-- | src/machine.rs | 1 | ||||
| -rw-r--r-- | src/tokenizer.rs | 9 | 
2 files changed, 10 insertions, 0 deletions
| diff --git a/src/machine.rs b/src/machine.rs index 149b64d..84fb525 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1659,6 +1659,7 @@ where              }          },          State::CharacterReference => { +            slf.some_offset = slf.reader.position() - "&".len();              slf.temporary_buffer.clear();              slf.temporary_buffer.push('&');              match slf.read_char()? { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f41f5ae..cecc0f6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -50,6 +50,7 @@ pub struct Tokenizer<R: Reader, O, E: Emitter<O>> {      last_start_tag_name: String,      is_start_tag: bool,      /// * Set to the start offset of `<!doctype` in [`InternalState::MarkupDeclarationOpen`]. +    /// * Set to the offset of `&` in [`InternalState::CharacterReference`].      pub(crate) some_offset: O,      /// This boolean flag exists so that the [`NaiveParser`](crate::NaiveParser) can work with any [`Emitter`]      /// (it cannot call [`Tokenizer::set_state`] using the emitted start tags since they can be of an arbitrary type). @@ -189,6 +190,14 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {              | Error::MissingSemicolonAfterCharacterReference => {                  self.reader.position()..self.reader.position()              } +            Error::AbsenceOfDigitsInNumericCharacterReference +            | Error::NullCharacterReference +            | Error::CharacterReferenceOutsideUnicodeRange +            | Error::SurrogateCharacterReference +            | Error::NoncharacterCharacterReference +            | Error::ControlCharacterReference +            | Error::UnknownNamedCharacterReference => self.some_offset..self.reader.position(), +              _ => self.reader.position() - 1..self.reader.position(),          };          self.emitter.emit_error(error, span); | 
