aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-31 05:18:49 +0200
committerMartin Fischer <martin@push-f.com>2023-09-03 23:00:05 +0200
commitd45e09a3157c71e3c95bf618a2b181f322c87a5d (patch)
tree0093a5b86f9234e7a92ba64e2112823159dc1f29
parent42a18751eeaaa1637a5446bc5e4965815aa959c0 (diff)
fix!: wrong attribute value spans for char refs
-rw-r--r--CHANGELOG.md6
-rw-r--r--src/attr.rs23
-rw-r--r--src/emitter.rs15
-rw-r--r--src/machine.rs6
-rw-r--r--tests/test_spans.rs2
5 files changed, 36 insertions, 16 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7b0dc2..832f00d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,10 @@
* Removed `adjusted_current_node_present_and_not_in_html_namespace`.
+* token types
+
+ * `AttributeOwned`: The `value_offset` field has been replaced with `value_span`.
+
* Added missing `R: Position<O>` bounds for `Tokenizer`/`NaiveParser` constructors.
(If you are able to construct a Tokenizer/NaiveParser,
you should be able to iterate over it.)
@@ -28,6 +32,8 @@
* Fixed `BufReadReader` skipping the whole line if it contained invalid UTF-8.
+* Fixed attribute value spans being wrong for values containing character references.
+
### 0.5.0 - 2023-08-19
#### Features
diff --git a/src/attr.rs b/src/attr.rs
index f7161cf..72dfcd9 100644
--- a/src/attr.rs
+++ b/src/attr.rs
@@ -32,10 +32,10 @@ pub(crate) struct AttrInternal<O> {
pub value: String,
/// The start offset of the attribute name.
pub name_offset: O,
- /// The start offset of the attribute value.
- /// For the empty attribute syntax this is just `O::default()`.
- /// We intentionally don't use `Option<O>` here to spare us a byte (and padding) per attribute.
- pub value_offset: O,
+ /// The span of the attribute value.
+ /// For the empty attribute syntax this is just `O::default()..O::default()`.
+ /// We intentionally don't use `Option<Range<O>>` here to spare us a byte (and padding) per attribute.
+ pub value_span: Range<O>,
pub value_syntax: Option<AttrValueSyntax>,
}
@@ -67,9 +67,9 @@ pub struct AttributeOwned<O> {
pub value: String,
/// The start offset of the attribute name.
pub name_offset: O,
- /// The start offset of the attribute value.
+ /// The span of the attribute value.
/// `None` in case of the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
- pub value_offset: Option<O>,
+ pub value_span: Option<Range<O>>,
/// The syntax of the attribute value.
/// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
pub value_syntax: Option<AttrValueSyntax>,
@@ -104,14 +104,14 @@ impl<'a, O: Offset> Attribute<'a, O> {
self.map_val.name_offset..self.map_val.name_offset + self.name.len()
}
- /// For explicitly defined values calculates the span of the attribute value and returns it.
+ /// For explicitly defined values returns the span of the attribute value.
///
/// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
pub fn value_span(&self) -> Option<Range<O>> {
if self.map_val.value_syntax.is_none() {
return None;
}
- Some(self.map_val.value_offset..self.map_val.value_offset + self.map_val.value.len())
+ Some(self.map_val.value_span.clone())
}
/// Returns the attribute value syntax in case the value is explicitly defined.
@@ -158,10 +158,7 @@ impl<O> Iterator for AttrIntoIter<O> {
name,
value: map_val.value,
name_offset: map_val.name_offset,
- value_offset: map_val
- .value_syntax
- .is_some()
- .then_some(map_val.value_offset),
+ value_span: map_val.value_syntax.is_some().then_some(map_val.value_span),
value_syntax: map_val.value_syntax,
})
}
@@ -200,7 +197,7 @@ impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> {
AttrInternal {
value,
name_offset: O::default(),
- value_offset: O::default(),
+ value_span: O::default()..O::default(),
value_syntax: Some(AttrValueSyntax::DoubleQuoted),
},
)
diff --git a/src/emitter.rs b/src/emitter.rs
index 6d9b60e..e56ddd5 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -78,6 +78,11 @@ pub trait Emitter<O> {
/// If the current token is not a doctype, this method may panic.
fn emit_current_doctype(&mut self, offset: O);
+ /// Called after the last [`push_attribute_value`] call for an attribute value.
+ ///
+ /// [`push_attribute_value`]: Self::push_attribute_value
+ fn terminate_attribute_value(&mut self, offset: O) {}
+
/// Assuming the _current token_ is a start tag, set the self-closing flag.
///
/// If the current token is not a start or end tag, this method may panic.
@@ -391,14 +396,14 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
crate::attr::AttrInternal {
name_offset: offset,
value: String::new(),
- value_offset: O::default(),
+ value_span: O::default()..O::default(),
value_syntax: None,
},
));
}
fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
let (_, current_attribute) = self.current_attribute.as_mut().unwrap();
- current_attribute.value_offset = offset;
+ current_attribute.value_span.start = offset;
current_attribute.value_syntax = Some(syntax);
}
@@ -410,6 +415,12 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
let current_attr = self.current_attribute.as_mut().unwrap();
current_attr.1.value.push_str(s);
}
+
+ fn terminate_attribute_value(&mut self, offset: O) {
+ let current_attr = self.current_attribute.as_mut().unwrap();
+ current_attr.1.value_span.end = offset;
+ }
+
fn init_doctype_public_id(&mut self, offset: O) {
let Some(Token::Doctype(doctype)) = &mut self.current_token else {
debug_assert!(false);
diff --git a/src/machine.rs b/src/machine.rs
index 509dae5..159a8a0 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -790,6 +790,8 @@ where
},
State::AttributeValueDoubleQuoted => match slf.read_char()? {
Some('"') => {
+ slf.emitter
+ .terminate_attribute_value(slf.reader.position() - 1);
slf.state = State::AfterAttributeValueQuoted;
Ok(ControlToken::Continue)
}
@@ -814,6 +816,8 @@ where
},
State::AttributeValueSingleQuoted => match slf.read_char()? {
Some('\'') => {
+ slf.emitter
+ .terminate_attribute_value(slf.reader.position() - 1);
slf.state = State::AfterAttributeValueQuoted;
Ok(ControlToken::Continue)
}
@@ -838,6 +842,8 @@ where
},
State::AttributeValueUnquoted => match slf.read_char()? {
Some(whitespace_pat!()) => {
+ slf.emitter
+ .terminate_attribute_value(slf.reader.position() - 1);
slf.state = State::BeforeAttributeName;
Ok(ControlToken::Continue)
}
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index bc6aeee..8f3dc8f 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -143,7 +143,7 @@ fn attribute_value_with_char_ref() {
}
assert_snapshot!(annotate(html, labels), @r###"
<test x=&amp; y='&amp;' z="&amp;">
- ^ ^ ^
+ ^^^^^ ^^^^^ ^^^^^
"###);
}