diff options
author | Martin Fischer <martin@push-f.com> | 2023-08-12 08:15:45 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-08-19 13:41:55 +0200 |
commit | ae5d8185a5b419f89d520504c1cb4c59c26879bf (patch) | |
tree | 0198c59440111154e2aaca1e8d4e8fb0809699c4 /src | |
parent | 180f6d6111b966627aa00a4017b6fb9751f7386c (diff) |
feat: make attribute value syntax recognizable
Note that while making this breaking change, we're also swapping
the parameter order for more consistency so that the reader
parameter always comes last in Emitter methods.
Diffstat (limited to 'src')
-rw-r--r-- | src/attr.rs | 24 | ||||
-rw-r--r-- | src/emitter.rs | 56 | ||||
-rw-r--r-- | src/machine.rs | 7 |
3 files changed, 80 insertions, 7 deletions
diff --git a/src/attr.rs b/src/attr.rs index 4c7e330..d062a84 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -34,6 +34,18 @@ pub(crate) struct AttrInternal<O> { pub name_offset: O, /// The start offset of the attribute value. pub value_offset: O, + pub value_syntax: Option<AttrValueSyntax>, +} + +/// The syntax of the attribute value. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum AttrValueSyntax { + /// An unquoted attribute value, e.g. `id=foo`. + Unquoted, + /// A single-quoted attribute value, e.g. `id='foo'`. + SingleQuoted, + /// A double-quoted attribute value, e.g. `id="foo"`. + DoubleQuoted, } /// An HTML attribute borrowed from an [`AttributeMap`]. @@ -54,6 +66,9 @@ pub struct AttributeOwned<O> { pub name_offset: O, /// The start offset of the attribute value. pub value_offset: O, // TODO: wrap this in an Option once we can recognize the empty attribute syntax + /// The syntax of the attribute value. + /// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`). + pub value_syntax: Option<AttrValueSyntax>, } impl<O> AttributeMap<O> { @@ -85,6 +100,13 @@ impl<'a, O: Offset> Attribute<'a, O> { pub fn value_span(&self) -> Range<O> { self.map_val.value_offset..self.map_val.value_offset + self.map_val.value.len() } + + /// Returns the attribute value syntax in case the value is explicitly defined. + /// + /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`). + pub fn value_syntax(&self) -> Option<AttrValueSyntax> { + self.map_val.value_syntax + } } // We cannot impl Index<Output=Attribute> because Index::index returns a reference of @@ -120,6 +142,7 @@ impl<O> Iterator for AttrIntoIter<O> { value: map_val.value, name_offset: map_val.name_offset, value_offset: map_val.value_offset, + value_syntax: map_val.value_syntax, }) } } @@ -158,6 +181,7 @@ impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> { value, name_offset: O::default(), value_offset: O::default(), + value_syntax: Some(AttrValueSyntax::DoubleQuoted), }, ) }) diff --git a/src/emitter.rs b/src/emitter.rs index 17a4882..63ef4b1 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -4,6 +4,7 @@ use std::collections::VecDeque; use std::mem; use std::ops::Range; +use crate::attr::AttrValueSyntax; use crate::offset::NoopOffset; use crate::offset::Offset; use crate::Error; @@ -121,11 +122,10 @@ pub trait Emitter<O> { fn init_attribute_name(&mut self, offset: O); /// Called before the first push_attribute_value call. - /// If the value is wrappend in double or single quotes `quoted` is set to true, otherwise false. /// /// If there is no current attribute, this method may panic. #[allow(unused_variables)] - fn init_attribute_value(&mut self, offset: O, quoted: bool) {} + fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {} /// Append a string to the current attribute's name. /// @@ -385,11 +385,14 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { name_offset: offset, value: String::new(), value_offset: O::default(), + value_syntax: None, }, )); } - fn init_attribute_value(&mut self, offset: O, _quoted: bool) { - self.current_attribute.as_mut().unwrap().1.value_offset = offset; + fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) { + let (_, current_attribute) = self.current_attribute.as_mut().unwrap(); + current_attribute.value_offset = offset; + current_attribute.value_syntax = Some(syntax); } fn push_attribute_name(&mut self, s: &str) { @@ -554,3 +557,48 @@ pub enum Token<O> { span: Range<O>, }, } + +/// The majority of our testing of the [`DefaultEmitter`] is done against the +/// html5lib-tests in the html5lib integration test. This module only tests +/// details that aren't present in the html5lib test data. +#[cfg(test)] +mod tests { + use super::{DefaultEmitter, Token}; + use crate::{attr::AttrValueSyntax, Tokenizer}; + + #[test] + fn test_attribute_value_syntax() { + let mut tokenizer = Tokenizer::new( + "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">", + DefaultEmitter::default(), + ) + .flatten(); + let Token::StartTag(start_tag) = tokenizer.next().unwrap() else { + panic!("expected start tag"); + }; + assert_eq!( + start_tag.attributes.get("empty").unwrap().value_syntax(), + None + ); + assert_eq!( + start_tag.attributes.get("unquoted").unwrap().value_syntax(), + Some(AttrValueSyntax::Unquoted) + ); + assert_eq!( + start_tag + .attributes + .get("single-quoted") + .unwrap() + .value_syntax(), + Some(AttrValueSyntax::SingleQuoted) + ); + assert_eq!( + start_tag + .attributes + .get("double-quoted") + .unwrap() + .value_syntax(), + Some(AttrValueSyntax::DoubleQuoted) + ); + } +} diff --git a/src/machine.rs b/src/machine.rs index a58a754..ccd3052 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1,3 +1,4 @@ +use crate::attr::AttrValueSyntax; use crate::entities::try_read_character_reference; use crate::offset::{Offset, Position}; use crate::utils::{ @@ -757,13 +758,13 @@ where Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('"') => { slf.emitter - .init_attribute_value(slf.reader.position(), true); + .init_attribute_value(AttrValueSyntax::DoubleQuoted, slf.reader.position()); slf.state = State::AttributeValueDoubleQuoted; Ok(ControlToken::Continue) } Some('\'') => { slf.emitter - .init_attribute_value(slf.reader.position(), true); + .init_attribute_value(AttrValueSyntax::SingleQuoted, slf.reader.position()); slf.state = State::AttributeValueSingleQuoted; Ok(ControlToken::Continue) } @@ -775,7 +776,7 @@ where } c => { slf.emitter - .init_attribute_value(slf.reader.position() - 1, false); + .init_attribute_value(AttrValueSyntax::Unquoted, slf.reader.position() - 1); slf.state = State::AttributeValueUnquoted; slf.unread_char(c); Ok(ControlToken::Continue) |