diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-01 13:22:51 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 |
commit | 55e6341d11aa4a51bb0a45e3719eb44c3706b0da (patch) | |
tree | 93946bbda6910aef722182ced49a045ba0550e13 | |
parent | 6556106154d7e7cbc7820f223b9baaf49a900449 (diff) |
fix: make doctype id spans encoding-independent
-rw-r--r-- | src/emitter.rs | 63 | ||||
-rw-r--r-- | src/machine.rs | 24 | ||||
-rw-r--r-- | tests/test_spans.rs | 2 |
3 files changed, 74 insertions, 15 deletions
diff --git a/src/emitter.rs b/src/emitter.rs index 341d335..9fdf967 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -158,6 +158,11 @@ pub trait Emitter<O> { /// If the current token is not a doctype, this method may panic. fn init_doctype_public_id(&mut self, offset: O); + /// Called after the last [`push_doctype_public_id`] call for a DOCTYPE public identifier. + /// + /// [`push_doctype_public_id`]: Self::push_doctype_public_id + fn terminate_doctype_public_id(&mut self, offset: O) {} + /// Assuming the _current token_ is a doctype, set its "system identifier" to the empty string. /// /// If the current token is not a doctype, this method may panic. @@ -172,6 +177,11 @@ pub trait Emitter<O> { /// /// If the current token is not a doctype, this method may panic. fn push_doctype_system_id(&mut self, s: &str); + + /// Called after the last [`push_doctype_system_id`] call for a DOCTYPE system identifier. + /// + /// [`push_doctype_system_id`]: Self::push_doctype_system_id + fn terminate_doctype_system_id(&mut self, offset: O) {} } /// The default implementation of [`Emitter`], used to produce tokens. @@ -410,8 +420,8 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { public_id: None, system_id: None, span: offset..O::default(), - public_id_offset: O::default(), - system_id_offset: O::default(), + public_id_span: O::default()..O::default(), + system_id_span: O::default()..O::default(), })); } @@ -459,7 +469,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { return; }; doctype.public_id = Some("".to_owned()); - doctype.public_id_offset = offset; + doctype.public_id_span.start = offset; } fn init_doctype_system_id(&mut self, offset: O) { let Some(Token::Doctype(doctype)) = &mut self.current_token else { @@ -467,7 +477,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { return; }; doctype.system_id = Some("".to_owned()); - doctype.system_id_offset = offset; + doctype.system_id_span.start = offset; } fn push_doctype_public_id(&mut self, s: &str) { if let Some(Token::Doctype(Doctype { @@ -480,6 +490,19 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { debug_assert!(false); } } + + fn terminate_doctype_public_id(&mut self, offset: O) { + if let Some(Token::Doctype(Doctype { + ref mut public_id_span, + .. + })) = self.current_token + { + public_id_span.end = offset; + } else { + debug_assert!(false); + } + } + fn push_doctype_system_id(&mut self, s: &str) { if let Some(Token::Doctype(Doctype { system_id: Some(ref mut id), @@ -491,6 +514,18 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { debug_assert!(false); } } + + fn terminate_doctype_system_id(&mut self, offset: O) { + if let Some(Token::Doctype(Doctype { + ref mut system_id_span, + .. + })) = self.current_token + { + system_id_span.end = offset; + } else { + debug_assert!(false); + } + } } /// An HTML start tag, such as `<p>` or `<a>`. @@ -573,24 +608,24 @@ pub struct Doctype<O> { /// The source code span of the doctype. pub span: Range<O>, - /// The source offset of the public identifier. - public_id_offset: O, + /// The span of the public identifier. + public_id_span: Range<O>, - /// The source offset of the system identifier. - system_id_offset: O, + /// The span of the system identifier. + system_id_span: Range<O>, } impl<O: Offset> Doctype<O> { - /// Calculates the span of the public identifier and returns it. + /// Returns the span of the public identifier. pub fn public_id_span(&self) -> Option<Range<O>> { - let public_id = self.public_id.as_ref()?; - Some(self.public_id_offset..self.public_id_offset + public_id.len()) + self.public_id.as_ref()?; + Some(self.public_id_span.clone()) } - /// Calculates the span of the system identifier and returns it. + /// Returns the span of the system identifier. pub fn system_id_span(&self) -> Option<Range<O>> { - let system_id = self.system_id.as_ref()?; - Some(self.system_id_offset..self.system_id_offset + system_id.len()) + self.system_id.as_ref()?; + Some(self.system_id_span.clone()) } } diff --git a/src/machine.rs b/src/machine.rs index 5c5c533..f00af0a 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1359,6 +1359,8 @@ where }, State::DoctypePublicIdentifierDoubleQuoted => match slf.read_char()? { Some('"') => { + slf.emitter + .terminate_doctype_public_id(slf.position_before_match); slf.state = State::AfterDoctypePublicIdentifier; Ok(ControlToken::Continue) } @@ -1368,6 +1370,8 @@ where Ok(ControlToken::Continue) } Some('>') => { + slf.emitter + .terminate_doctype_public_id(slf.position_before_match); slf.emit_error(Error::AbruptDoctypePublicIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; @@ -1375,6 +1379,8 @@ where Ok(ControlToken::Continue) } None => { + slf.emitter + .terminate_doctype_public_id(slf.reader.position()); slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); slf.emitter.emit_current_doctype(slf.reader.position()); @@ -1387,6 +1393,8 @@ where }, State::DoctypePublicIdentifierSingleQuoted => match slf.read_char()? { Some('\'') => { + slf.emitter + .terminate_doctype_public_id(slf.position_before_match); slf.state = State::AfterDoctypePublicIdentifier; Ok(ControlToken::Continue) } @@ -1396,6 +1404,8 @@ where Ok(ControlToken::Continue) } Some('>') => { + slf.emitter + .terminate_doctype_public_id(slf.position_before_match); slf.emit_error(Error::AbruptDoctypePublicIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; @@ -1403,6 +1413,8 @@ where Ok(ControlToken::Continue) } None => { + slf.emitter + .terminate_doctype_public_id(slf.reader.position()); slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); slf.emitter.emit_current_doctype(slf.reader.position()); @@ -1553,6 +1565,8 @@ where }, State::DoctypeSystemIdentifierDoubleQuoted => match slf.read_char()? { Some('"') => { + slf.emitter + .terminate_doctype_system_id(slf.position_before_match); slf.state = State::AfterDoctypeSystemIdentifier; Ok(ControlToken::Continue) } @@ -1562,6 +1576,8 @@ where Ok(ControlToken::Continue) } Some('>') => { + slf.emitter + .terminate_doctype_system_id(slf.position_before_match); slf.emit_error(Error::AbruptDoctypeSystemIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; @@ -1569,6 +1585,8 @@ where Ok(ControlToken::Continue) } None => { + slf.emitter + .terminate_doctype_system_id(slf.reader.position()); slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); slf.emitter.emit_current_doctype(slf.reader.position()); @@ -1581,6 +1599,8 @@ where }, State::DoctypeSystemIdentifierSingleQuoted => match slf.read_char()? { Some('\'') => { + slf.emitter + .terminate_doctype_system_id(slf.position_before_match); slf.state = State::AfterDoctypeSystemIdentifier; Ok(ControlToken::Continue) } @@ -1590,6 +1610,8 @@ where Ok(ControlToken::Continue) } Some('>') => { + slf.emitter + .terminate_doctype_system_id(slf.position_before_match); slf.emit_error(Error::AbruptDoctypeSystemIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; @@ -1597,6 +1619,8 @@ where Ok(ControlToken::Continue) } None => { + slf.emitter + .terminate_doctype_system_id(slf.reader.position()); slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); slf.emitter.emit_current_doctype(slf.reader.position()); diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 74724a5..09f10f4 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -299,7 +299,7 @@ fn doctype_id_spans() { labels }; - assert_panics_but_should_not(|| assert_char_encoding_independence(case, labeler)); // FIXME + assert_char_encoding_independence(case, labeler); annotated.push_str(&test_and_annotate(case, labeler)); } |