diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-03 11:06:25 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 |
commit | 6e6bbcd053c6114a9fa75052b09e701eaa2f3465 (patch) | |
tree | 609a4823541e28a40244ad018ad1bb3a9aeb0124 | |
parent | d56686deab81c8b50207b75a485cf26ec8502383 (diff) |
feat: add Doctype::name_span
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | src/emitter.rs | 25 | ||||
-rw-r--r-- | src/machine.rs | 6 | ||||
-rw-r--r-- | tests/test_spans.rs | 10 |
4 files changed, 40 insertions, 3 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 0deb60e..cc62de5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ (Since `adjusted_current_node_present_and_not_in_html_namespace` has been removed, the DefaultEmitter is now spec-compliant and can be exposed in good conscience.) +* Added `Doctype::name_span`. + #### Breaking changes * Iterating over `Tokenizer` now yields values of a new `Event` enum. diff --git a/src/emitter.rs b/src/emitter.rs index 69baec2..ed8e978 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -158,6 +158,11 @@ pub trait Emitter<O> { /// If the current token is not a doctype, this method may panic. fn init_doctype_name(&mut self, offset: O) {} + /// Called after the last [`push_doctype_name`] call for a DOCTYPE name. + /// + /// [`push_doctype_name`]: Self::push_doctype_name + fn terminate_doctype_name(&mut self, offset: O) {} + /// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string. /// /// If the current token is not a doctype, this method may panic. @@ -422,6 +427,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { return; }; doctype.name = Some("".into()); + doctype.name_span.start = offset; } fn push_doctype_name(&mut self, s: &str) { @@ -433,6 +439,15 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { _ => debug_assert!(false), } } + + fn terminate_doctype_name(&mut self, offset: O) { + let Some(Token::Doctype(doctype)) = &mut self.current_token else { + debug_assert!(false); + return; + }; + doctype.name_span.end = offset; + } + fn init_doctype(&mut self, offset: O) { self.current_token = Some(Token::Doctype(Doctype { name: None, @@ -440,6 +455,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { public_id: None, system_id: None, span: offset..O::default(), + name_span: O::default()..O::default(), public_id_span: O::default()..O::default(), system_id_span: O::default()..O::default(), })); @@ -628,6 +644,9 @@ pub struct Doctype<O> { /// The source code span of the doctype. pub span: Range<O>, + /// The span of the name. + name_span: Range<O>, + /// The span of the public identifier. public_id_span: Range<O>, @@ -636,6 +655,12 @@ pub struct Doctype<O> { } impl<O: Offset> Doctype<O> { + /// Returns the span of the name. + pub fn name_span(&self) -> Option<Range<O>> { + self.name.as_ref()?; + Some(self.name_span.clone()) + } + /// Returns the span of the public identifier. pub fn public_id_span(&self) -> Option<Range<O>> { self.public_id.as_ref()?; diff --git a/src/machine.rs b/src/machine.rs index 6c4558c..d175b8b 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1239,10 +1239,14 @@ where }, State::DoctypeName => match slf.read_char()? { Some(whitespace_pat!()) => { + slf.emitter + .terminate_doctype_name(slf.position_before_match); slf.state = State::AfterDoctypeName; Ok(ControlToken::Continue) } Some('>') => { + slf.emitter + .terminate_doctype_name(slf.position_before_match); slf.state = State::Data; slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) @@ -1254,6 +1258,8 @@ where } None => { slf.emit_error(Error::EofInDoctype); + slf.emitter + .terminate_doctype_name(slf.position_before_match); slf.emitter.set_force_quirks(); slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 08b5a31..718a176 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -308,6 +308,9 @@ fn doctype_id_spans() { }; let mut labels = Vec::new(); + if let Some(name_span) = doctype.name_span() { + labels.push((name_span, "name")); + } if let Some(public_id_span) = doctype.public_id_span() { labels.push((public_id_span, "public id")); } @@ -322,9 +325,10 @@ fn doctype_id_spans() { assert_snapshot!(annotated, @r###" <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> - ^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id - │ - public id + ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id + │ │ + │ public id + name "###); } |