From cb4e9cf7cfc1612c67f16bfabadbdf442cd380fe Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Thu, 17 Aug 2023 16:12:51 +0200 Subject: fix: fix lots of position off-by-ones Previously the PosTrackingReader always mysteriously subtracted 1 from the current position ... this wasn't sound at all ... the machine just happens to often call `Tokenizer::unread_char` ... but not always. E.g. for proper comments it didn't which resulted in their offset and spans being off-by-one, which is fixed by this commit (see test_spans.rs). --- src/emitter.rs | 7 +++---- src/machine.rs | 8 ++++---- src/offset.rs | 2 +- src/tokenizer.rs | 8 ++++---- 4 files changed, 12 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/emitter.rs b/src/emitter.rs index 1f60f70..90aa4db 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -310,7 +310,7 @@ impl Emitter for DefaultEmitter { *self_closing = true; } Token::EndTag(_) => { - self.emit_error(Error::EndTagWithTrailingSolidus, offset); + self.emit_error(Error::EndTagWithTrailingSolidus, offset - 1); } _ => { debug_assert!(false); @@ -378,9 +378,8 @@ impl Emitter for DefaultEmitter { }, )); } - fn init_attribute_value(&mut self, offset: O, quoted: bool) { - self.current_attribute.as_mut().unwrap().1.value_span = - offset + quoted as usize..offset + quoted as usize; + fn init_attribute_value(&mut self, offset: O, _quoted: bool) { + self.current_attribute.as_mut().unwrap().1.value_span = offset..offset; } fn push_attribute_name(&mut self, s: &str) { diff --git a/src/machine.rs b/src/machine.rs index deb3983..a58a754 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -694,7 +694,7 @@ where Ok(ControlToken::Continue) } Some(x) => { - slf.emitter.init_attribute_name(slf.reader.position()); + slf.emitter.init_attribute_name(slf.reader.position() - 1); slf.state = State::AttributeName; slf.unread_char(Some(x)); Ok(ControlToken::Continue) @@ -747,7 +747,7 @@ where Ok(ControlToken::Eof) } Some(x) => { - slf.emitter.init_attribute_name(slf.reader.position()); + slf.emitter.init_attribute_name(slf.reader.position() - 1); slf.state = State::AttributeName; slf.unread_char(Some(x)); Ok(ControlToken::Continue) @@ -775,7 +775,7 @@ where } c => { slf.emitter - .init_attribute_value(slf.reader.position(), false); + .init_attribute_value(slf.reader.position() - 1, false); slf.state = State::AttributeValueUnquoted; slf.unread_char(c); Ok(ControlToken::Continue) @@ -952,7 +952,7 @@ where } c => { slf.emit_error(Error::IncorrectlyOpenedComment); - slf.emitter.init_comment(slf.reader.position()); + slf.emitter.init_comment(slf.reader.position() - 1); slf.state = State::BogusComment; slf.unread_char(c); Ok(ControlToken::Continue) diff --git a/src/offset.rs b/src/offset.rs index 8809366..3152c78 100644 --- a/src/offset.rs +++ b/src/offset.rs @@ -68,7 +68,7 @@ impl PosTrackingReader { impl Position for PosTrackingReader { fn position(&self) -> usize { - self.position - 1 + self.position } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 02a4d62..e8a8908 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -106,7 +106,7 @@ impl From for InternalState { } } -impl, O, E: Emitter> Tokenizer { +impl, O: Offset, E: Emitter> Tokenizer { /// Test-internal function to override internal state. /// /// Only available with the `integration-tests` feature which is not public API. @@ -123,7 +123,7 @@ impl, O, E: Emitter> Tokenizer { /// Just a helper method for the machine. #[inline] pub(crate) fn emit_error(&mut self, error: Error) { - self.emitter.emit_error(error, self.reader.position()); + self.emitter.emit_error(error, self.reader.position() - 1); } /// Assuming the _current token_ is an end tag, return true if all of these hold. Return false otherwise. @@ -140,14 +140,14 @@ impl, O, E: Emitter> Tokenizer { #[inline] pub(crate) fn init_start_tag(&mut self) { - self.emitter.init_start_tag(self.reader.position()); + self.emitter.init_start_tag(self.reader.position() - 1); self.current_tag_name.clear(); self.is_start_tag = true; } #[inline] pub(crate) fn init_end_tag(&mut self) { - self.emitter.init_end_tag(self.reader.position()); + self.emitter.init_end_tag(self.reader.position() - 1); self.current_tag_name.clear(); self.is_start_tag = false; } -- cgit v1.2.3