diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/attr.rs | 24 | ||||
| -rw-r--r-- | src/emitter.rs | 56 | ||||
| -rw-r--r-- | src/machine.rs | 7 | 
3 files changed, 80 insertions, 7 deletions
| diff --git a/src/attr.rs b/src/attr.rs index 4c7e330..d062a84 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -34,6 +34,18 @@ pub(crate) struct AttrInternal<O> {      pub name_offset: O,      /// The start offset of the attribute value.      pub value_offset: O, +    pub value_syntax: Option<AttrValueSyntax>, +} + +/// The syntax of the attribute value. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum AttrValueSyntax { +    /// An unquoted attribute value, e.g. `id=foo`. +    Unquoted, +    /// A single-quoted attribute value, e.g. `id='foo'`. +    SingleQuoted, +    /// A double-quoted attribute value, e.g. `id="foo"`. +    DoubleQuoted,  }  /// An HTML attribute borrowed from an [`AttributeMap`]. @@ -54,6 +66,9 @@ pub struct AttributeOwned<O> {      pub name_offset: O,      /// The start offset of the attribute value.      pub value_offset: O, // TODO: wrap this in an Option once we can recognize the empty attribute syntax +    /// The syntax of the attribute value. +    /// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`). +    pub value_syntax: Option<AttrValueSyntax>,  }  impl<O> AttributeMap<O> { @@ -85,6 +100,13 @@ impl<'a, O: Offset> Attribute<'a, O> {      pub fn value_span(&self) -> Range<O> {          self.map_val.value_offset..self.map_val.value_offset + self.map_val.value.len()      } + +    /// Returns the attribute value syntax in case the value is explicitly defined. +    /// +    /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`). +    pub fn value_syntax(&self) -> Option<AttrValueSyntax> { +        self.map_val.value_syntax +    }  }  // We cannot impl Index<Output=Attribute> because Index::index returns a reference of @@ -120,6 +142,7 @@ impl<O> Iterator for AttrIntoIter<O> {              value: map_val.value,              name_offset: map_val.name_offset,              value_offset: map_val.value_offset, +            value_syntax: map_val.value_syntax,          })      }  } @@ -158,6 +181,7 @@ impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> {                              value,                              name_offset: O::default(),                              value_offset: O::default(), +                            value_syntax: Some(AttrValueSyntax::DoubleQuoted),                          },                      )                  }) diff --git a/src/emitter.rs b/src/emitter.rs index 17a4882..63ef4b1 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -4,6 +4,7 @@ use std::collections::VecDeque;  use std::mem;  use std::ops::Range; +use crate::attr::AttrValueSyntax;  use crate::offset::NoopOffset;  use crate::offset::Offset;  use crate::Error; @@ -121,11 +122,10 @@ pub trait Emitter<O> {      fn init_attribute_name(&mut self, offset: O);      /// Called before the first push_attribute_value call. -    /// If the value is wrappend in double or single quotes `quoted` is set to true, otherwise false.      ///      /// If there is no current attribute, this method may panic.      #[allow(unused_variables)] -    fn init_attribute_value(&mut self, offset: O, quoted: bool) {} +    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {}      /// Append a string to the current attribute's name.      /// @@ -385,11 +385,14 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {                  name_offset: offset,                  value: String::new(),                  value_offset: O::default(), +                value_syntax: None,              },          ));      } -    fn init_attribute_value(&mut self, offset: O, _quoted: bool) { -        self.current_attribute.as_mut().unwrap().1.value_offset = offset; +    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) { +        let (_, current_attribute) = self.current_attribute.as_mut().unwrap(); +        current_attribute.value_offset = offset; +        current_attribute.value_syntax = Some(syntax);      }      fn push_attribute_name(&mut self, s: &str) { @@ -554,3 +557,48 @@ pub enum Token<O> {          span: Range<O>,      },  } + +/// The majority of our testing of the [`DefaultEmitter`] is done against the +/// html5lib-tests in the html5lib integration test. This module only tests +/// details that aren't present in the html5lib test data. +#[cfg(test)] +mod tests { +    use super::{DefaultEmitter, Token}; +    use crate::{attr::AttrValueSyntax, Tokenizer}; + +    #[test] +    fn test_attribute_value_syntax() { +        let mut tokenizer = Tokenizer::new( +            "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">", +            DefaultEmitter::default(), +        ) +        .flatten(); +        let Token::StartTag(start_tag) = tokenizer.next().unwrap() else { +            panic!("expected start tag"); +        }; +        assert_eq!( +            start_tag.attributes.get("empty").unwrap().value_syntax(), +            None +        ); +        assert_eq!( +            start_tag.attributes.get("unquoted").unwrap().value_syntax(), +            Some(AttrValueSyntax::Unquoted) +        ); +        assert_eq!( +            start_tag +                .attributes +                .get("single-quoted") +                .unwrap() +                .value_syntax(), +            Some(AttrValueSyntax::SingleQuoted) +        ); +        assert_eq!( +            start_tag +                .attributes +                .get("double-quoted") +                .unwrap() +                .value_syntax(), +            Some(AttrValueSyntax::DoubleQuoted) +        ); +    } +} diff --git a/src/machine.rs b/src/machine.rs index a58a754..ccd3052 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1,3 +1,4 @@ +use crate::attr::AttrValueSyntax;  use crate::entities::try_read_character_reference;  use crate::offset::{Offset, Position};  use crate::utils::{ @@ -757,13 +758,13 @@ where              Some(whitespace_pat!()) => Ok(ControlToken::Continue),              Some('"') => {                  slf.emitter -                    .init_attribute_value(slf.reader.position(), true); +                    .init_attribute_value(AttrValueSyntax::DoubleQuoted, slf.reader.position());                  slf.state = State::AttributeValueDoubleQuoted;                  Ok(ControlToken::Continue)              }              Some('\'') => {                  slf.emitter -                    .init_attribute_value(slf.reader.position(), true); +                    .init_attribute_value(AttrValueSyntax::SingleQuoted, slf.reader.position());                  slf.state = State::AttributeValueSingleQuoted;                  Ok(ControlToken::Continue)              } @@ -775,7 +776,7 @@ where              }              c => {                  slf.emitter -                    .init_attribute_value(slf.reader.position() - 1, false); +                    .init_attribute_value(AttrValueSyntax::Unquoted, slf.reader.position() - 1);                  slf.state = State::AttributeValueUnquoted;                  slf.unread_char(c);                  Ok(ControlToken::Continue) | 
