summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/machine.rs1
-rw-r--r--src/tokenizer.rs9
2 files changed, 10 insertions, 0 deletions
diff --git a/src/machine.rs b/src/machine.rs
index 149b64d..84fb525 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1659,6 +1659,7 @@ where
}
},
State::CharacterReference => {
+ slf.some_offset = slf.reader.position() - "&".len();
slf.temporary_buffer.clear();
slf.temporary_buffer.push('&');
match slf.read_char()? {
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index f41f5ae..cecc0f6 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -50,6 +50,7 @@ pub struct Tokenizer<R: Reader, O, E: Emitter<O>> {
last_start_tag_name: String,
is_start_tag: bool,
/// * Set to the start offset of `<!doctype` in [`InternalState::MarkupDeclarationOpen`].
+ /// * Set to the offset of `&` in [`InternalState::CharacterReference`].
pub(crate) some_offset: O,
/// This boolean flag exists so that the [`NaiveParser`](crate::NaiveParser) can work with any [`Emitter`]
/// (it cannot call [`Tokenizer::set_state`] using the emitted start tags since they can be of an arbitrary type).
@@ -189,6 +190,14 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
| Error::MissingSemicolonAfterCharacterReference => {
self.reader.position()..self.reader.position()
}
+ Error::AbsenceOfDigitsInNumericCharacterReference
+ | Error::NullCharacterReference
+ | Error::CharacterReferenceOutsideUnicodeRange
+ | Error::SurrogateCharacterReference
+ | Error::NoncharacterCharacterReference
+ | Error::ControlCharacterReference
+ | Error::UnknownNamedCharacterReference => self.some_offset..self.reader.position(),
+
_ => self.reader.position() - 1..self.reader.position(),
};
self.emitter.emit_error(error, span);