summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-01 13:22:51 +0200
committerMartin Fischer <martin@push-f.com>2023-09-03 23:00:05 +0200
commit55e6341d11aa4a51bb0a45e3719eb44c3706b0da (patch)
tree93946bbda6910aef722182ced49a045ba0550e13
parent6556106154d7e7cbc7820f223b9baaf49a900449 (diff)
fix: make doctype id spans encoding-independent
-rw-r--r--src/emitter.rs63
-rw-r--r--src/machine.rs24
-rw-r--r--tests/test_spans.rs2
3 files changed, 74 insertions, 15 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index 341d335..9fdf967 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -158,6 +158,11 @@ pub trait Emitter<O> {
/// If the current token is not a doctype, this method may panic.
fn init_doctype_public_id(&mut self, offset: O);
+ /// Called after the last [`push_doctype_public_id`] call for a DOCTYPE public identifier.
+ ///
+ /// [`push_doctype_public_id`]: Self::push_doctype_public_id
+ fn terminate_doctype_public_id(&mut self, offset: O) {}
+
/// Assuming the _current token_ is a doctype, set its "system identifier" to the empty string.
///
/// If the current token is not a doctype, this method may panic.
@@ -172,6 +177,11 @@ pub trait Emitter<O> {
///
/// If the current token is not a doctype, this method may panic.
fn push_doctype_system_id(&mut self, s: &str);
+
+ /// Called after the last [`push_doctype_system_id`] call for a DOCTYPE system identifier.
+ ///
+ /// [`push_doctype_system_id`]: Self::push_doctype_system_id
+ fn terminate_doctype_system_id(&mut self, offset: O) {}
}
/// The default implementation of [`Emitter`], used to produce tokens.
@@ -410,8 +420,8 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
public_id: None,
system_id: None,
span: offset..O::default(),
- public_id_offset: O::default(),
- system_id_offset: O::default(),
+ public_id_span: O::default()..O::default(),
+ system_id_span: O::default()..O::default(),
}));
}
@@ -459,7 +469,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
return;
};
doctype.public_id = Some("".to_owned());
- doctype.public_id_offset = offset;
+ doctype.public_id_span.start = offset;
}
fn init_doctype_system_id(&mut self, offset: O) {
let Some(Token::Doctype(doctype)) = &mut self.current_token else {
@@ -467,7 +477,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
return;
};
doctype.system_id = Some("".to_owned());
- doctype.system_id_offset = offset;
+ doctype.system_id_span.start = offset;
}
fn push_doctype_public_id(&mut self, s: &str) {
if let Some(Token::Doctype(Doctype {
@@ -480,6 +490,19 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
debug_assert!(false);
}
}
+
+ fn terminate_doctype_public_id(&mut self, offset: O) {
+ if let Some(Token::Doctype(Doctype {
+ ref mut public_id_span,
+ ..
+ })) = self.current_token
+ {
+ public_id_span.end = offset;
+ } else {
+ debug_assert!(false);
+ }
+ }
+
fn push_doctype_system_id(&mut self, s: &str) {
if let Some(Token::Doctype(Doctype {
system_id: Some(ref mut id),
@@ -491,6 +514,18 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
debug_assert!(false);
}
}
+
+ fn terminate_doctype_system_id(&mut self, offset: O) {
+ if let Some(Token::Doctype(Doctype {
+ ref mut system_id_span,
+ ..
+ })) = self.current_token
+ {
+ system_id_span.end = offset;
+ } else {
+ debug_assert!(false);
+ }
+ }
}
/// An HTML start tag, such as `<p>` or `<a>`.
@@ -573,24 +608,24 @@ pub struct Doctype<O> {
/// The source code span of the doctype.
pub span: Range<O>,
- /// The source offset of the public identifier.
- public_id_offset: O,
+ /// The span of the public identifier.
+ public_id_span: Range<O>,
- /// The source offset of the system identifier.
- system_id_offset: O,
+ /// The span of the system identifier.
+ system_id_span: Range<O>,
}
impl<O: Offset> Doctype<O> {
- /// Calculates the span of the public identifier and returns it.
+ /// Returns the span of the public identifier.
pub fn public_id_span(&self) -> Option<Range<O>> {
- let public_id = self.public_id.as_ref()?;
- Some(self.public_id_offset..self.public_id_offset + public_id.len())
+ self.public_id.as_ref()?;
+ Some(self.public_id_span.clone())
}
- /// Calculates the span of the system identifier and returns it.
+ /// Returns the span of the system identifier.
pub fn system_id_span(&self) -> Option<Range<O>> {
- let system_id = self.system_id.as_ref()?;
- Some(self.system_id_offset..self.system_id_offset + system_id.len())
+ self.system_id.as_ref()?;
+ Some(self.system_id_span.clone())
}
}
diff --git a/src/machine.rs b/src/machine.rs
index 5c5c533..f00af0a 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1359,6 +1359,8 @@ where
},
State::DoctypePublicIdentifierDoubleQuoted => match slf.read_char()? {
Some('"') => {
+ slf.emitter
+ .terminate_doctype_public_id(slf.position_before_match);
slf.state = State::AfterDoctypePublicIdentifier;
Ok(ControlToken::Continue)
}
@@ -1368,6 +1370,8 @@ where
Ok(ControlToken::Continue)
}
Some('>') => {
+ slf.emitter
+ .terminate_doctype_public_id(slf.position_before_match);
slf.emit_error(Error::AbruptDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
@@ -1375,6 +1379,8 @@ where
Ok(ControlToken::Continue)
}
None => {
+ slf.emitter
+ .terminate_doctype_public_id(slf.reader.position());
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
@@ -1387,6 +1393,8 @@ where
},
State::DoctypePublicIdentifierSingleQuoted => match slf.read_char()? {
Some('\'') => {
+ slf.emitter
+ .terminate_doctype_public_id(slf.position_before_match);
slf.state = State::AfterDoctypePublicIdentifier;
Ok(ControlToken::Continue)
}
@@ -1396,6 +1404,8 @@ where
Ok(ControlToken::Continue)
}
Some('>') => {
+ slf.emitter
+ .terminate_doctype_public_id(slf.position_before_match);
slf.emit_error(Error::AbruptDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
@@ -1403,6 +1413,8 @@ where
Ok(ControlToken::Continue)
}
None => {
+ slf.emitter
+ .terminate_doctype_public_id(slf.reader.position());
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
@@ -1553,6 +1565,8 @@ where
},
State::DoctypeSystemIdentifierDoubleQuoted => match slf.read_char()? {
Some('"') => {
+ slf.emitter
+ .terminate_doctype_system_id(slf.position_before_match);
slf.state = State::AfterDoctypeSystemIdentifier;
Ok(ControlToken::Continue)
}
@@ -1562,6 +1576,8 @@ where
Ok(ControlToken::Continue)
}
Some('>') => {
+ slf.emitter
+ .terminate_doctype_system_id(slf.position_before_match);
slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
@@ -1569,6 +1585,8 @@ where
Ok(ControlToken::Continue)
}
None => {
+ slf.emitter
+ .terminate_doctype_system_id(slf.reader.position());
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
@@ -1581,6 +1599,8 @@ where
},
State::DoctypeSystemIdentifierSingleQuoted => match slf.read_char()? {
Some('\'') => {
+ slf.emitter
+ .terminate_doctype_system_id(slf.position_before_match);
slf.state = State::AfterDoctypeSystemIdentifier;
Ok(ControlToken::Continue)
}
@@ -1590,6 +1610,8 @@ where
Ok(ControlToken::Continue)
}
Some('>') => {
+ slf.emitter
+ .terminate_doctype_system_id(slf.position_before_match);
slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
@@ -1597,6 +1619,8 @@ where
Ok(ControlToken::Continue)
}
None => {
+ slf.emitter
+ .terminate_doctype_system_id(slf.reader.position());
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index 74724a5..09f10f4 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -299,7 +299,7 @@ fn doctype_id_spans() {
labels
};
- assert_panics_but_should_not(|| assert_char_encoding_independence(case, labeler)); // FIXME
+ assert_char_encoding_independence(case, labeler);
annotated.push_str(&test_and_annotate(case, labeler));
}