diff options
-rw-r--r-- | CHANGELOG.md | 6 | ||||
-rw-r--r-- | src/attr.rs | 23 | ||||
-rw-r--r-- | src/emitter.rs | 15 | ||||
-rw-r--r-- | src/machine.rs | 6 | ||||
-rw-r--r-- | tests/test_spans.rs | 2 |
5 files changed, 36 insertions, 16 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index c7b0dc2..832f00d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,10 @@ * Removed `adjusted_current_node_present_and_not_in_html_namespace`. +* token types + + * `AttributeOwned`: The `value_offset` field has been replaced with `value_span`. + * Added missing `R: Position<O>` bounds for `Tokenizer`/`NaiveParser` constructors. (If you are able to construct a Tokenizer/NaiveParser, you should be able to iterate over it.) @@ -28,6 +32,8 @@ * Fixed `BufReadReader` skipping the whole line if it contained invalid UTF-8. +* Fixed attribute value spans being wrong for values containing character references. + ### 0.5.0 - 2023-08-19 #### Features diff --git a/src/attr.rs b/src/attr.rs index f7161cf..72dfcd9 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -32,10 +32,10 @@ pub(crate) struct AttrInternal<O> { pub value: String, /// The start offset of the attribute name. pub name_offset: O, - /// The start offset of the attribute value. - /// For the empty attribute syntax this is just `O::default()`. - /// We intentionally don't use `Option<O>` here to spare us a byte (and padding) per attribute. - pub value_offset: O, + /// The span of the attribute value. + /// For the empty attribute syntax this is just `O::default()..O::default()`. + /// We intentionally don't use `Option<Range<O>>` here to spare us a byte (and padding) per attribute. + pub value_span: Range<O>, pub value_syntax: Option<AttrValueSyntax>, } @@ -67,9 +67,9 @@ pub struct AttributeOwned<O> { pub value: String, /// The start offset of the attribute name. pub name_offset: O, - /// The start offset of the attribute value. + /// The span of the attribute value. /// `None` in case of the empty attribute syntax (e.g. `disabled` in `<input disabled>`). - pub value_offset: Option<O>, + pub value_span: Option<Range<O>>, /// The syntax of the attribute value. /// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`). pub value_syntax: Option<AttrValueSyntax>, @@ -104,14 +104,14 @@ impl<'a, O: Offset> Attribute<'a, O> { self.map_val.name_offset..self.map_val.name_offset + self.name.len() } - /// For explicitly defined values calculates the span of the attribute value and returns it. + /// For explicitly defined values returns the span of the attribute value. /// /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`). pub fn value_span(&self) -> Option<Range<O>> { if self.map_val.value_syntax.is_none() { return None; } - Some(self.map_val.value_offset..self.map_val.value_offset + self.map_val.value.len()) + Some(self.map_val.value_span.clone()) } /// Returns the attribute value syntax in case the value is explicitly defined. @@ -158,10 +158,7 @@ impl<O> Iterator for AttrIntoIter<O> { name, value: map_val.value, name_offset: map_val.name_offset, - value_offset: map_val - .value_syntax - .is_some() - .then_some(map_val.value_offset), + value_span: map_val.value_syntax.is_some().then_some(map_val.value_span), value_syntax: map_val.value_syntax, }) } @@ -200,7 +197,7 @@ impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> { AttrInternal { value, name_offset: O::default(), - value_offset: O::default(), + value_span: O::default()..O::default(), value_syntax: Some(AttrValueSyntax::DoubleQuoted), }, ) diff --git a/src/emitter.rs b/src/emitter.rs index 6d9b60e..e56ddd5 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -78,6 +78,11 @@ pub trait Emitter<O> { /// If the current token is not a doctype, this method may panic. fn emit_current_doctype(&mut self, offset: O); + /// Called after the last [`push_attribute_value`] call for an attribute value. + /// + /// [`push_attribute_value`]: Self::push_attribute_value + fn terminate_attribute_value(&mut self, offset: O) {} + /// Assuming the _current token_ is a start tag, set the self-closing flag. /// /// If the current token is not a start or end tag, this method may panic. @@ -391,14 +396,14 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { crate::attr::AttrInternal { name_offset: offset, value: String::new(), - value_offset: O::default(), + value_span: O::default()..O::default(), value_syntax: None, }, )); } fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) { let (_, current_attribute) = self.current_attribute.as_mut().unwrap(); - current_attribute.value_offset = offset; + current_attribute.value_span.start = offset; current_attribute.value_syntax = Some(syntax); } @@ -410,6 +415,12 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { let current_attr = self.current_attribute.as_mut().unwrap(); current_attr.1.value.push_str(s); } + + fn terminate_attribute_value(&mut self, offset: O) { + let current_attr = self.current_attribute.as_mut().unwrap(); + current_attr.1.value_span.end = offset; + } + fn init_doctype_public_id(&mut self, offset: O) { let Some(Token::Doctype(doctype)) = &mut self.current_token else { debug_assert!(false); diff --git a/src/machine.rs b/src/machine.rs index 509dae5..159a8a0 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -790,6 +790,8 @@ where }, State::AttributeValueDoubleQuoted => match slf.read_char()? { Some('"') => { + slf.emitter + .terminate_attribute_value(slf.reader.position() - 1); slf.state = State::AfterAttributeValueQuoted; Ok(ControlToken::Continue) } @@ -814,6 +816,8 @@ where }, State::AttributeValueSingleQuoted => match slf.read_char()? { Some('\'') => { + slf.emitter + .terminate_attribute_value(slf.reader.position() - 1); slf.state = State::AfterAttributeValueQuoted; Ok(ControlToken::Continue) } @@ -838,6 +842,8 @@ where }, State::AttributeValueUnquoted => match slf.read_char()? { Some(whitespace_pat!()) => { + slf.emitter + .terminate_attribute_value(slf.reader.position() - 1); slf.state = State::BeforeAttributeName; Ok(ControlToken::Continue) } diff --git a/tests/test_spans.rs b/tests/test_spans.rs index bc6aeee..8f3dc8f 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -143,7 +143,7 @@ fn attribute_value_with_char_ref() { } assert_snapshot!(annotate(html, labels), @r###" <test x=& y='&' z="&"> - ^ ^ ^ + ^^^^^ ^^^^^ ^^^^^ "###); } |