summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-12 08:15:45 +0200
committerMartin Fischer <martin@push-f.com>2023-08-19 13:41:55 +0200
commitae5d8185a5b419f89d520504c1cb4c59c26879bf (patch)
tree0198c59440111154e2aaca1e8d4e8fb0809699c4
parent180f6d6111b966627aa00a4017b6fb9751f7386c (diff)
feat: make attribute value syntax recognizable
Note that while making this breaking change, we're also swapping the parameter order for more consistency so that the reader parameter always comes last in Emitter methods.
-rw-r--r--src/attr.rs24
-rw-r--r--src/emitter.rs56
-rw-r--r--src/machine.rs7
3 files changed, 80 insertions, 7 deletions
diff --git a/src/attr.rs b/src/attr.rs
index 4c7e330..d062a84 100644
--- a/src/attr.rs
+++ b/src/attr.rs
@@ -34,6 +34,18 @@ pub(crate) struct AttrInternal<O> {
pub name_offset: O,
/// The start offset of the attribute value.
pub value_offset: O,
+ pub value_syntax: Option<AttrValueSyntax>,
+}
+
+/// The syntax of the attribute value.
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum AttrValueSyntax {
+ /// An unquoted attribute value, e.g. `id=foo`.
+ Unquoted,
+ /// A single-quoted attribute value, e.g. `id='foo'`.
+ SingleQuoted,
+ /// A double-quoted attribute value, e.g. `id="foo"`.
+ DoubleQuoted,
}
/// An HTML attribute borrowed from an [`AttributeMap`].
@@ -54,6 +66,9 @@ pub struct AttributeOwned<O> {
pub name_offset: O,
/// The start offset of the attribute value.
pub value_offset: O, // TODO: wrap this in an Option once we can recognize the empty attribute syntax
+ /// The syntax of the attribute value.
+ /// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
+ pub value_syntax: Option<AttrValueSyntax>,
}
impl<O> AttributeMap<O> {
@@ -85,6 +100,13 @@ impl<'a, O: Offset> Attribute<'a, O> {
pub fn value_span(&self) -> Range<O> {
self.map_val.value_offset..self.map_val.value_offset + self.map_val.value.len()
}
+
+ /// Returns the attribute value syntax in case the value is explicitly defined.
+ ///
+ /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
+ pub fn value_syntax(&self) -> Option<AttrValueSyntax> {
+ self.map_val.value_syntax
+ }
}
// We cannot impl Index<Output=Attribute> because Index::index returns a reference of
@@ -120,6 +142,7 @@ impl<O> Iterator for AttrIntoIter<O> {
value: map_val.value,
name_offset: map_val.name_offset,
value_offset: map_val.value_offset,
+ value_syntax: map_val.value_syntax,
})
}
}
@@ -158,6 +181,7 @@ impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> {
value,
name_offset: O::default(),
value_offset: O::default(),
+ value_syntax: Some(AttrValueSyntax::DoubleQuoted),
},
)
})
diff --git a/src/emitter.rs b/src/emitter.rs
index 17a4882..63ef4b1 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -4,6 +4,7 @@ use std::collections::VecDeque;
use std::mem;
use std::ops::Range;
+use crate::attr::AttrValueSyntax;
use crate::offset::NoopOffset;
use crate::offset::Offset;
use crate::Error;
@@ -121,11 +122,10 @@ pub trait Emitter<O> {
fn init_attribute_name(&mut self, offset: O);
/// Called before the first push_attribute_value call.
- /// If the value is wrappend in double or single quotes `quoted` is set to true, otherwise false.
///
/// If there is no current attribute, this method may panic.
#[allow(unused_variables)]
- fn init_attribute_value(&mut self, offset: O, quoted: bool) {}
+ fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {}
/// Append a string to the current attribute's name.
///
@@ -385,11 +385,14 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
name_offset: offset,
value: String::new(),
value_offset: O::default(),
+ value_syntax: None,
},
));
}
- fn init_attribute_value(&mut self, offset: O, _quoted: bool) {
- self.current_attribute.as_mut().unwrap().1.value_offset = offset;
+ fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
+ let (_, current_attribute) = self.current_attribute.as_mut().unwrap();
+ current_attribute.value_offset = offset;
+ current_attribute.value_syntax = Some(syntax);
}
fn push_attribute_name(&mut self, s: &str) {
@@ -554,3 +557,48 @@ pub enum Token<O> {
span: Range<O>,
},
}
+
+/// The majority of our testing of the [`DefaultEmitter`] is done against the
+/// html5lib-tests in the html5lib integration test. This module only tests
+/// details that aren't present in the html5lib test data.
+#[cfg(test)]
+mod tests {
+ use super::{DefaultEmitter, Token};
+ use crate::{attr::AttrValueSyntax, Tokenizer};
+
+ #[test]
+ fn test_attribute_value_syntax() {
+ let mut tokenizer = Tokenizer::new(
+ "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
+ DefaultEmitter::default(),
+ )
+ .flatten();
+ let Token::StartTag(start_tag) = tokenizer.next().unwrap() else {
+ panic!("expected start tag");
+ };
+ assert_eq!(
+ start_tag.attributes.get("empty").unwrap().value_syntax(),
+ None
+ );
+ assert_eq!(
+ start_tag.attributes.get("unquoted").unwrap().value_syntax(),
+ Some(AttrValueSyntax::Unquoted)
+ );
+ assert_eq!(
+ start_tag
+ .attributes
+ .get("single-quoted")
+ .unwrap()
+ .value_syntax(),
+ Some(AttrValueSyntax::SingleQuoted)
+ );
+ assert_eq!(
+ start_tag
+ .attributes
+ .get("double-quoted")
+ .unwrap()
+ .value_syntax(),
+ Some(AttrValueSyntax::DoubleQuoted)
+ );
+ }
+}
diff --git a/src/machine.rs b/src/machine.rs
index a58a754..ccd3052 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1,3 +1,4 @@
+use crate::attr::AttrValueSyntax;
use crate::entities::try_read_character_reference;
use crate::offset::{Offset, Position};
use crate::utils::{
@@ -757,13 +758,13 @@ where
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('"') => {
slf.emitter
- .init_attribute_value(slf.reader.position(), true);
+ .init_attribute_value(AttrValueSyntax::DoubleQuoted, slf.reader.position());
slf.state = State::AttributeValueDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
slf.emitter
- .init_attribute_value(slf.reader.position(), true);
+ .init_attribute_value(AttrValueSyntax::SingleQuoted, slf.reader.position());
slf.state = State::AttributeValueSingleQuoted;
Ok(ControlToken::Continue)
}
@@ -775,7 +776,7 @@ where
}
c => {
slf.emitter
- .init_attribute_value(slf.reader.position() - 1, false);
+ .init_attribute_value(AttrValueSyntax::Unquoted, slf.reader.position() - 1);
slf.state = State::AttributeValueUnquoted;
slf.unread_char(c);
Ok(ControlToken::Continue)