diff options
| author | Martin Fischer <martin@push-f.com> | 2023-09-03 11:06:25 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 | 
| commit | 6e6bbcd053c6114a9fa75052b09e701eaa2f3465 (patch) | |
| tree | 609a4823541e28a40244ad018ad1bb3a9aeb0124 | |
| parent | d56686deab81c8b50207b75a485cf26ec8502383 (diff) | |
feat: add Doctype::name_span
| -rw-r--r-- | CHANGELOG.md | 2 | ||||
| -rw-r--r-- | src/emitter.rs | 25 | ||||
| -rw-r--r-- | src/machine.rs | 6 | ||||
| -rw-r--r-- | tests/test_spans.rs | 10 | 
4 files changed, 40 insertions, 3 deletions
| diff --git a/CHANGELOG.md b/CHANGELOG.md index 0deb60e..cc62de5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@    (Since `adjusted_current_node_present_and_not_in_html_namespace` has been removed,    the DefaultEmitter is now spec-compliant and can be exposed in good conscience.) +* Added `Doctype::name_span`. +  #### Breaking changes  * Iterating over `Tokenizer` now yields values of a new `Event` enum. diff --git a/src/emitter.rs b/src/emitter.rs index 69baec2..ed8e978 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -158,6 +158,11 @@ pub trait Emitter<O> {      /// If the current token is not a doctype, this method may panic.      fn init_doctype_name(&mut self, offset: O) {} +    /// Called after the last [`push_doctype_name`] call for a DOCTYPE name. +    /// +    /// [`push_doctype_name`]: Self::push_doctype_name +    fn terminate_doctype_name(&mut self, offset: O) {} +      /// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string.      ///      /// If the current token is not a doctype, this method may panic. @@ -422,6 +427,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {              return;          };          doctype.name = Some("".into()); +        doctype.name_span.start = offset;      }      fn push_doctype_name(&mut self, s: &str) { @@ -433,6 +439,15 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {              _ => debug_assert!(false),          }      } + +    fn terminate_doctype_name(&mut self, offset: O) { +        let Some(Token::Doctype(doctype)) = &mut self.current_token else { +            debug_assert!(false); +            return; +        }; +        doctype.name_span.end = offset; +    } +      fn init_doctype(&mut self, offset: O) {          self.current_token = Some(Token::Doctype(Doctype {              name: None, @@ -440,6 +455,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {              public_id: None,              system_id: None,              span: offset..O::default(), +            name_span: O::default()..O::default(),              public_id_span: O::default()..O::default(),              system_id_span: O::default()..O::default(),          })); @@ -628,6 +644,9 @@ pub struct Doctype<O> {      /// The source code span of the doctype.      pub span: Range<O>, +    /// The span of the name. +    name_span: Range<O>, +      /// The span of the public identifier.      public_id_span: Range<O>, @@ -636,6 +655,12 @@ pub struct Doctype<O> {  }  impl<O: Offset> Doctype<O> { +    /// Returns the span of the name. +    pub fn name_span(&self) -> Option<Range<O>> { +        self.name.as_ref()?; +        Some(self.name_span.clone()) +    } +      /// Returns the span of the public identifier.      pub fn public_id_span(&self) -> Option<Range<O>> {          self.public_id.as_ref()?; diff --git a/src/machine.rs b/src/machine.rs index 6c4558c..d175b8b 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1239,10 +1239,14 @@ where          },          State::DoctypeName => match slf.read_char()? {              Some(whitespace_pat!()) => { +                slf.emitter +                    .terminate_doctype_name(slf.position_before_match);                  slf.state = State::AfterDoctypeName;                  Ok(ControlToken::Continue)              }              Some('>') => { +                slf.emitter +                    .terminate_doctype_name(slf.position_before_match);                  slf.state = State::Data;                  slf.emitter.emit_current_doctype(slf.reader.position());                  Ok(ControlToken::Continue) @@ -1254,6 +1258,8 @@ where              }              None => {                  slf.emit_error(Error::EofInDoctype); +                slf.emitter +                    .terminate_doctype_name(slf.position_before_match);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype(slf.reader.position());                  Ok(ControlToken::Eof) diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 08b5a31..718a176 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -308,6 +308,9 @@ fn doctype_id_spans() {              };              let mut labels = Vec::new(); +            if let Some(name_span) = doctype.name_span() { +                labels.push((name_span, "name")); +            }              if let Some(public_id_span) = doctype.public_id_span() {                  labels.push((public_id_span, "public id"));              } @@ -322,9 +325,10 @@ fn doctype_id_spans() {      assert_snapshot!(annotated, @r###"      <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> -                           ^^^^^^^^^^^^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id -                           │ -                           public id +              ^^^^         ^^^^^^^^^^^^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id +              │            │ +              │            public id +              name      "###);  } | 
