summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-03 11:06:25 +0200
committerMartin Fischer <martin@push-f.com>2023-09-03 23:00:05 +0200
commit6e6bbcd053c6114a9fa75052b09e701eaa2f3465 (patch)
tree609a4823541e28a40244ad018ad1bb3a9aeb0124
parentd56686deab81c8b50207b75a485cf26ec8502383 (diff)
feat: add Doctype::name_span
-rw-r--r--CHANGELOG.md2
-rw-r--r--src/emitter.rs25
-rw-r--r--src/machine.rs6
-rw-r--r--tests/test_spans.rs10
4 files changed, 40 insertions, 3 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0deb60e..cc62de5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@
(Since `adjusted_current_node_present_and_not_in_html_namespace` has been removed,
the DefaultEmitter is now spec-compliant and can be exposed in good conscience.)
+* Added `Doctype::name_span`.
+
#### Breaking changes
* Iterating over `Tokenizer` now yields values of a new `Event` enum.
diff --git a/src/emitter.rs b/src/emitter.rs
index 69baec2..ed8e978 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -158,6 +158,11 @@ pub trait Emitter<O> {
/// If the current token is not a doctype, this method may panic.
fn init_doctype_name(&mut self, offset: O) {}
+ /// Called after the last [`push_doctype_name`] call for a DOCTYPE name.
+ ///
+ /// [`push_doctype_name`]: Self::push_doctype_name
+ fn terminate_doctype_name(&mut self, offset: O) {}
+
/// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string.
///
/// If the current token is not a doctype, this method may panic.
@@ -422,6 +427,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
return;
};
doctype.name = Some("".into());
+ doctype.name_span.start = offset;
}
fn push_doctype_name(&mut self, s: &str) {
@@ -433,6 +439,15 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
_ => debug_assert!(false),
}
}
+
+ fn terminate_doctype_name(&mut self, offset: O) {
+ let Some(Token::Doctype(doctype)) = &mut self.current_token else {
+ debug_assert!(false);
+ return;
+ };
+ doctype.name_span.end = offset;
+ }
+
fn init_doctype(&mut self, offset: O) {
self.current_token = Some(Token::Doctype(Doctype {
name: None,
@@ -440,6 +455,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
public_id: None,
system_id: None,
span: offset..O::default(),
+ name_span: O::default()..O::default(),
public_id_span: O::default()..O::default(),
system_id_span: O::default()..O::default(),
}));
@@ -628,6 +644,9 @@ pub struct Doctype<O> {
/// The source code span of the doctype.
pub span: Range<O>,
+ /// The span of the name.
+ name_span: Range<O>,
+
/// The span of the public identifier.
public_id_span: Range<O>,
@@ -636,6 +655,12 @@ pub struct Doctype<O> {
}
impl<O: Offset> Doctype<O> {
+ /// Returns the span of the name.
+ pub fn name_span(&self) -> Option<Range<O>> {
+ self.name.as_ref()?;
+ Some(self.name_span.clone())
+ }
+
/// Returns the span of the public identifier.
pub fn public_id_span(&self) -> Option<Range<O>> {
self.public_id.as_ref()?;
diff --git a/src/machine.rs b/src/machine.rs
index 6c4558c..d175b8b 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1239,10 +1239,14 @@ where
},
State::DoctypeName => match slf.read_char()? {
Some(whitespace_pat!()) => {
+ slf.emitter
+ .terminate_doctype_name(slf.position_before_match);
slf.state = State::AfterDoctypeName;
Ok(ControlToken::Continue)
}
Some('>') => {
+ slf.emitter
+ .terminate_doctype_name(slf.position_before_match);
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
@@ -1254,6 +1258,8 @@ where
}
None => {
slf.emit_error(Error::EofInDoctype);
+ slf.emitter
+ .terminate_doctype_name(slf.position_before_match);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index 08b5a31..718a176 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -308,6 +308,9 @@ fn doctype_id_spans() {
};
let mut labels = Vec::new();
+ if let Some(name_span) = doctype.name_span() {
+ labels.push((name_span, "name"));
+ }
if let Some(public_id_span) = doctype.public_id_span() {
labels.push((public_id_span, "public id"));
}
@@ -322,9 +325,10 @@ fn doctype_id_spans() {
assert_snapshot!(annotated, @r###"
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
- ^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id
- │
- public id
+ ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id
+ │ │
+ │ public id
+ name
"###);
}