diff options
| author | Martin Fischer <martin@push-f.com> | 2023-08-31 05:18:49 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 | 
| commit | d45e09a3157c71e3c95bf618a2b181f322c87a5d (patch) | |
| tree | 0093a5b86f9234e7a92ba64e2112823159dc1f29 | |
| parent | 42a18751eeaaa1637a5446bc5e4965815aa959c0 (diff) | |
fix!: wrong attribute value spans for char refs
| -rw-r--r-- | CHANGELOG.md | 6 | ||||
| -rw-r--r-- | src/attr.rs | 23 | ||||
| -rw-r--r-- | src/emitter.rs | 15 | ||||
| -rw-r--r-- | src/machine.rs | 6 | ||||
| -rw-r--r-- | tests/test_spans.rs | 2 | 
5 files changed, 36 insertions, 16 deletions
| diff --git a/CHANGELOG.md b/CHANGELOG.md index c7b0dc2..832f00d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,10 @@    * Removed `adjusted_current_node_present_and_not_in_html_namespace`. +* token types + +  * `AttributeOwned`: The `value_offset` field has been replaced with `value_span`. +  * Added missing `R: Position<O>` bounds for `Tokenizer`/`NaiveParser` constructors.      (If you are able to construct a Tokenizer/NaiveParser,    you should be able to iterate over it.) @@ -28,6 +32,8 @@  * Fixed `BufReadReader` skipping the whole line if it contained invalid UTF-8. +* Fixed attribute value spans being wrong for values containing character references. +  ### 0.5.0 - 2023-08-19  #### Features diff --git a/src/attr.rs b/src/attr.rs index f7161cf..72dfcd9 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -32,10 +32,10 @@ pub(crate) struct AttrInternal<O> {      pub value: String,      /// The start offset of the attribute name.      pub name_offset: O, -    /// The start offset of the attribute value. -    /// For the empty attribute syntax this is just `O::default()`. -    /// We intentionally don't use `Option<O>` here to spare us a byte (and padding) per attribute. -    pub value_offset: O, +    /// The span of the attribute value. +    /// For the empty attribute syntax this is just `O::default()..O::default()`. +    /// We intentionally don't use `Option<Range<O>>` here to spare us a byte (and padding) per attribute. +    pub value_span: Range<O>,      pub value_syntax: Option<AttrValueSyntax>,  } @@ -67,9 +67,9 @@ pub struct AttributeOwned<O> {      pub value: String,      /// The start offset of the attribute name.      pub name_offset: O, -    /// The start offset of the attribute value. +    /// The span of the attribute value.      /// `None` in case of the empty attribute syntax (e.g. `disabled` in `<input disabled>`). -    pub value_offset: Option<O>, +    pub value_span: Option<Range<O>>,      /// The syntax of the attribute value.      /// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`).      pub value_syntax: Option<AttrValueSyntax>, @@ -104,14 +104,14 @@ impl<'a, O: Offset> Attribute<'a, O> {          self.map_val.name_offset..self.map_val.name_offset + self.name.len()      } -    /// For explicitly defined values calculates the span of the attribute value and returns it. +    /// For explicitly defined values returns the span of the attribute value.      ///      /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).      pub fn value_span(&self) -> Option<Range<O>> {          if self.map_val.value_syntax.is_none() {              return None;          } -        Some(self.map_val.value_offset..self.map_val.value_offset + self.map_val.value.len()) +        Some(self.map_val.value_span.clone())      }      /// Returns the attribute value syntax in case the value is explicitly defined. @@ -158,10 +158,7 @@ impl<O> Iterator for AttrIntoIter<O> {              name,              value: map_val.value,              name_offset: map_val.name_offset, -            value_offset: map_val -                .value_syntax -                .is_some() -                .then_some(map_val.value_offset), +            value_span: map_val.value_syntax.is_some().then_some(map_val.value_span),              value_syntax: map_val.value_syntax,          })      } @@ -200,7 +197,7 @@ impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> {                          AttrInternal {                              value,                              name_offset: O::default(), -                            value_offset: O::default(), +                            value_span: O::default()..O::default(),                              value_syntax: Some(AttrValueSyntax::DoubleQuoted),                          },                      ) diff --git a/src/emitter.rs b/src/emitter.rs index 6d9b60e..e56ddd5 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -78,6 +78,11 @@ pub trait Emitter<O> {      /// If the current token is not a doctype, this method may panic.      fn emit_current_doctype(&mut self, offset: O); +    /// Called after the last [`push_attribute_value`] call for an attribute value. +    /// +    /// [`push_attribute_value`]: Self::push_attribute_value +    fn terminate_attribute_value(&mut self, offset: O) {} +      /// Assuming the _current token_ is a start tag, set the self-closing flag.      ///      /// If the current token is not a start or end tag, this method may panic. @@ -391,14 +396,14 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {              crate::attr::AttrInternal {                  name_offset: offset,                  value: String::new(), -                value_offset: O::default(), +                value_span: O::default()..O::default(),                  value_syntax: None,              },          ));      }      fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {          let (_, current_attribute) = self.current_attribute.as_mut().unwrap(); -        current_attribute.value_offset = offset; +        current_attribute.value_span.start = offset;          current_attribute.value_syntax = Some(syntax);      } @@ -410,6 +415,12 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {          let current_attr = self.current_attribute.as_mut().unwrap();          current_attr.1.value.push_str(s);      } + +    fn terminate_attribute_value(&mut self, offset: O) { +        let current_attr = self.current_attribute.as_mut().unwrap(); +        current_attr.1.value_span.end = offset; +    } +      fn init_doctype_public_id(&mut self, offset: O) {          let Some(Token::Doctype(doctype)) = &mut self.current_token else {              debug_assert!(false); diff --git a/src/machine.rs b/src/machine.rs index 509dae5..159a8a0 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -790,6 +790,8 @@ where          },          State::AttributeValueDoubleQuoted => match slf.read_char()? {              Some('"') => { +                slf.emitter +                    .terminate_attribute_value(slf.reader.position() - 1);                  slf.state = State::AfterAttributeValueQuoted;                  Ok(ControlToken::Continue)              } @@ -814,6 +816,8 @@ where          },          State::AttributeValueSingleQuoted => match slf.read_char()? {              Some('\'') => { +                slf.emitter +                    .terminate_attribute_value(slf.reader.position() - 1);                  slf.state = State::AfterAttributeValueQuoted;                  Ok(ControlToken::Continue)              } @@ -838,6 +842,8 @@ where          },          State::AttributeValueUnquoted => match slf.read_char()? {              Some(whitespace_pat!()) => { +                slf.emitter +                    .terminate_attribute_value(slf.reader.position() - 1);                  slf.state = State::BeforeAttributeName;                  Ok(ControlToken::Continue)              } diff --git a/tests/test_spans.rs b/tests/test_spans.rs index bc6aeee..8f3dc8f 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -143,7 +143,7 @@ fn attribute_value_with_char_ref() {      }      assert_snapshot!(annotate(html, labels), @r###"      <test x=& y='&' z="&"> -            ^        ^         ^ +            ^^^^^    ^^^^^     ^^^^^      "###);  } | 
