diff options
-rw-r--r-- | src/attr.rs | 24 | ||||
-rw-r--r-- | src/emitter.rs | 56 | ||||
-rw-r--r-- | src/machine.rs | 7 |
3 files changed, 80 insertions, 7 deletions
diff --git a/src/attr.rs b/src/attr.rs index 4c7e330..d062a84 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -34,6 +34,18 @@ pub(crate) struct AttrInternal<O> { pub name_offset: O, /// The start offset of the attribute value. pub value_offset: O, + pub value_syntax: Option<AttrValueSyntax>, +} + +/// The syntax of the attribute value. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum AttrValueSyntax { + /// An unquoted attribute value, e.g. `id=foo`. + Unquoted, + /// A single-quoted attribute value, e.g. `id='foo'`. + SingleQuoted, + /// A double-quoted attribute value, e.g. `id="foo"`. + DoubleQuoted, } /// An HTML attribute borrowed from an [`AttributeMap`]. @@ -54,6 +66,9 @@ pub struct AttributeOwned<O> { pub name_offset: O, /// The start offset of the attribute value. pub value_offset: O, // TODO: wrap this in an Option once we can recognize the empty attribute syntax + /// The syntax of the attribute value. + /// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`). + pub value_syntax: Option<AttrValueSyntax>, } impl<O> AttributeMap<O> { @@ -85,6 +100,13 @@ impl<'a, O: Offset> Attribute<'a, O> { pub fn value_span(&self) -> Range<O> { self.map_val.value_offset..self.map_val.value_offset + self.map_val.value.len() } + + /// Returns the attribute value syntax in case the value is explicitly defined. + /// + /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`). + pub fn value_syntax(&self) -> Option<AttrValueSyntax> { + self.map_val.value_syntax + } } // We cannot impl Index<Output=Attribute> because Index::index returns a reference of @@ -120,6 +142,7 @@ impl<O> Iterator for AttrIntoIter<O> { value: map_val.value, name_offset: map_val.name_offset, value_offset: map_val.value_offset, + value_syntax: map_val.value_syntax, }) } } @@ -158,6 +181,7 @@ impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> { value, name_offset: O::default(), value_offset: O::default(), + value_syntax: Some(AttrValueSyntax::DoubleQuoted), }, ) }) diff --git a/src/emitter.rs b/src/emitter.rs index 17a4882..63ef4b1 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -4,6 +4,7 @@ use std::collections::VecDeque; use std::mem; use std::ops::Range; +use crate::attr::AttrValueSyntax; use crate::offset::NoopOffset; use crate::offset::Offset; use crate::Error; @@ -121,11 +122,10 @@ pub trait Emitter<O> { fn init_attribute_name(&mut self, offset: O); /// Called before the first push_attribute_value call. - /// If the value is wrappend in double or single quotes `quoted` is set to true, otherwise false. /// /// If there is no current attribute, this method may panic. #[allow(unused_variables)] - fn init_attribute_value(&mut self, offset: O, quoted: bool) {} + fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {} /// Append a string to the current attribute's name. /// @@ -385,11 +385,14 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { name_offset: offset, value: String::new(), value_offset: O::default(), + value_syntax: None, }, )); } - fn init_attribute_value(&mut self, offset: O, _quoted: bool) { - self.current_attribute.as_mut().unwrap().1.value_offset = offset; + fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) { + let (_, current_attribute) = self.current_attribute.as_mut().unwrap(); + current_attribute.value_offset = offset; + current_attribute.value_syntax = Some(syntax); } fn push_attribute_name(&mut self, s: &str) { @@ -554,3 +557,48 @@ pub enum Token<O> { span: Range<O>, }, } + +/// The majority of our testing of the [`DefaultEmitter`] is done against the +/// html5lib-tests in the html5lib integration test. This module only tests +/// details that aren't present in the html5lib test data. +#[cfg(test)] +mod tests { + use super::{DefaultEmitter, Token}; + use crate::{attr::AttrValueSyntax, Tokenizer}; + + #[test] + fn test_attribute_value_syntax() { + let mut tokenizer = Tokenizer::new( + "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">", + DefaultEmitter::default(), + ) + .flatten(); + let Token::StartTag(start_tag) = tokenizer.next().unwrap() else { + panic!("expected start tag"); + }; + assert_eq!( + start_tag.attributes.get("empty").unwrap().value_syntax(), + None + ); + assert_eq!( + start_tag.attributes.get("unquoted").unwrap().value_syntax(), + Some(AttrValueSyntax::Unquoted) + ); + assert_eq!( + start_tag + .attributes + .get("single-quoted") + .unwrap() + .value_syntax(), + Some(AttrValueSyntax::SingleQuoted) + ); + assert_eq!( + start_tag + .attributes + .get("double-quoted") + .unwrap() + .value_syntax(), + Some(AttrValueSyntax::DoubleQuoted) + ); + } +} diff --git a/src/machine.rs b/src/machine.rs index a58a754..ccd3052 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1,3 +1,4 @@ +use crate::attr::AttrValueSyntax; use crate::entities::try_read_character_reference; use crate::offset::{Offset, Position}; use crate::utils::{ @@ -757,13 +758,13 @@ where Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('"') => { slf.emitter - .init_attribute_value(slf.reader.position(), true); + .init_attribute_value(AttrValueSyntax::DoubleQuoted, slf.reader.position()); slf.state = State::AttributeValueDoubleQuoted; Ok(ControlToken::Continue) } Some('\'') => { slf.emitter - .init_attribute_value(slf.reader.position(), true); + .init_attribute_value(AttrValueSyntax::SingleQuoted, slf.reader.position()); slf.state = State::AttributeValueSingleQuoted; Ok(ControlToken::Continue) } @@ -775,7 +776,7 @@ where } c => { slf.emitter - .init_attribute_value(slf.reader.position() - 1, false); + .init_attribute_value(AttrValueSyntax::Unquoted, slf.reader.position() - 1); slf.state = State::AttributeValueUnquoted; slf.unread_char(c); Ok(ControlToken::Continue) |