aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/attr.rs24
-rw-r--r--src/emitter.rs56
-rw-r--r--src/machine.rs7
3 files changed, 80 insertions, 7 deletions
diff --git a/src/attr.rs b/src/attr.rs
index 4c7e330..d062a84 100644
--- a/src/attr.rs
+++ b/src/attr.rs
@@ -34,6 +34,18 @@ pub(crate) struct AttrInternal<O> {
pub name_offset: O,
/// The start offset of the attribute value.
pub value_offset: O,
+ pub value_syntax: Option<AttrValueSyntax>,
+}
+
+/// The syntax of the attribute value.
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum AttrValueSyntax {
+ /// An unquoted attribute value, e.g. `id=foo`.
+ Unquoted,
+ /// A single-quoted attribute value, e.g. `id='foo'`.
+ SingleQuoted,
+ /// A double-quoted attribute value, e.g. `id="foo"`.
+ DoubleQuoted,
}
/// An HTML attribute borrowed from an [`AttributeMap`].
@@ -54,6 +66,9 @@ pub struct AttributeOwned<O> {
pub name_offset: O,
/// The start offset of the attribute value.
pub value_offset: O, // TODO: wrap this in an Option once we can recognize the empty attribute syntax
+ /// The syntax of the attribute value.
+ /// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
+ pub value_syntax: Option<AttrValueSyntax>,
}
impl<O> AttributeMap<O> {
@@ -85,6 +100,13 @@ impl<'a, O: Offset> Attribute<'a, O> {
pub fn value_span(&self) -> Range<O> {
self.map_val.value_offset..self.map_val.value_offset + self.map_val.value.len()
}
+
+ /// Returns the attribute value syntax in case the value is explicitly defined.
+ ///
+ /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
+ pub fn value_syntax(&self) -> Option<AttrValueSyntax> {
+ self.map_val.value_syntax
+ }
}
// We cannot impl Index<Output=Attribute> because Index::index returns a reference of
@@ -120,6 +142,7 @@ impl<O> Iterator for AttrIntoIter<O> {
value: map_val.value,
name_offset: map_val.name_offset,
value_offset: map_val.value_offset,
+ value_syntax: map_val.value_syntax,
})
}
}
@@ -158,6 +181,7 @@ impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> {
value,
name_offset: O::default(),
value_offset: O::default(),
+ value_syntax: Some(AttrValueSyntax::DoubleQuoted),
},
)
})
diff --git a/src/emitter.rs b/src/emitter.rs
index 17a4882..63ef4b1 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -4,6 +4,7 @@ use std::collections::VecDeque;
use std::mem;
use std::ops::Range;
+use crate::attr::AttrValueSyntax;
use crate::offset::NoopOffset;
use crate::offset::Offset;
use crate::Error;
@@ -121,11 +122,10 @@ pub trait Emitter<O> {
fn init_attribute_name(&mut self, offset: O);
/// Called before the first push_attribute_value call.
- /// If the value is wrappend in double or single quotes `quoted` is set to true, otherwise false.
///
/// If there is no current attribute, this method may panic.
#[allow(unused_variables)]
- fn init_attribute_value(&mut self, offset: O, quoted: bool) {}
+ fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {}
/// Append a string to the current attribute's name.
///
@@ -385,11 +385,14 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
name_offset: offset,
value: String::new(),
value_offset: O::default(),
+ value_syntax: None,
},
));
}
- fn init_attribute_value(&mut self, offset: O, _quoted: bool) {
- self.current_attribute.as_mut().unwrap().1.value_offset = offset;
+ fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
+ let (_, current_attribute) = self.current_attribute.as_mut().unwrap();
+ current_attribute.value_offset = offset;
+ current_attribute.value_syntax = Some(syntax);
}
fn push_attribute_name(&mut self, s: &str) {
@@ -554,3 +557,48 @@ pub enum Token<O> {
span: Range<O>,
},
}
+
+/// The majority of our testing of the [`DefaultEmitter`] is done against the
+/// html5lib-tests in the html5lib integration test. This module only tests
+/// details that aren't present in the html5lib test data.
+#[cfg(test)]
+mod tests {
+ use super::{DefaultEmitter, Token};
+ use crate::{attr::AttrValueSyntax, Tokenizer};
+
+ #[test]
+ fn test_attribute_value_syntax() {
+ let mut tokenizer = Tokenizer::new(
+ "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
+ DefaultEmitter::default(),
+ )
+ .flatten();
+ let Token::StartTag(start_tag) = tokenizer.next().unwrap() else {
+ panic!("expected start tag");
+ };
+ assert_eq!(
+ start_tag.attributes.get("empty").unwrap().value_syntax(),
+ None
+ );
+ assert_eq!(
+ start_tag.attributes.get("unquoted").unwrap().value_syntax(),
+ Some(AttrValueSyntax::Unquoted)
+ );
+ assert_eq!(
+ start_tag
+ .attributes
+ .get("single-quoted")
+ .unwrap()
+ .value_syntax(),
+ Some(AttrValueSyntax::SingleQuoted)
+ );
+ assert_eq!(
+ start_tag
+ .attributes
+ .get("double-quoted")
+ .unwrap()
+ .value_syntax(),
+ Some(AttrValueSyntax::DoubleQuoted)
+ );
+ }
+}
diff --git a/src/machine.rs b/src/machine.rs
index a58a754..ccd3052 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1,3 +1,4 @@
+use crate::attr::AttrValueSyntax;
use crate::entities::try_read_character_reference;
use crate::offset::{Offset, Position};
use crate::utils::{
@@ -757,13 +758,13 @@ where
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('"') => {
slf.emitter
- .init_attribute_value(slf.reader.position(), true);
+ .init_attribute_value(AttrValueSyntax::DoubleQuoted, slf.reader.position());
slf.state = State::AttributeValueDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
slf.emitter
- .init_attribute_value(slf.reader.position(), true);
+ .init_attribute_value(AttrValueSyntax::SingleQuoted, slf.reader.position());
slf.state = State::AttributeValueSingleQuoted;
Ok(ControlToken::Continue)
}
@@ -775,7 +776,7 @@ where
}
c => {
slf.emitter
- .init_attribute_value(slf.reader.position() - 1, false);
+ .init_attribute_value(AttrValueSyntax::Unquoted, slf.reader.position() - 1);
slf.state = State::AttributeValueUnquoted;
slf.unread_char(c);
Ok(ControlToken::Continue)