diff options
| author | Martin Fischer <martin@push-f.com> | 2023-08-16 09:45:18 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2023-08-19 13:41:55 +0200 | 
| commit | 65aca9cbf0318bd3a2f936641b4f5bc3729c98c2 (patch) | |
| tree | 3bf6ae3ea03c1f377e9b4bd6fd7d6af99fbddc5a | |
| parent | 0f460c2e77f450a2bac68eec97b2c62aa33c0495 (diff) | |
break!: introduce AttributeMap
This has a number of benefits:
* it hides the implementation of the map
* it hides the type used for the map values
  (which lets us e.g. change name_span to name_offset while still
  being able to provide a convenient `Attribute::name_span` method.)
* it lets us provide convenience impls for the map
  such as `FromIterator<(String, String)>`
| -rw-r--r-- | integration_tests/tests/test_html5lib.rs | 2 | ||||
| -rw-r--r-- | src/attr.rs | 127 | ||||
| -rw-r--r-- | src/emitter.rs | 9 | ||||
| -rw-r--r-- | src/lib.rs | 3 | ||||
| -rw-r--r-- | tests/test_spans.rs | 8 | 
5 files changed, 129 insertions, 20 deletions
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index 0f96063..a624c30 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -129,7 +129,7 @@ fn run_test_inner<R: Reader>(                  attributes: tag                      .attributes                      .into_iter() -                    .map(|(name, map_val)| (name, map_val.value)) +                    .map(|attr| (attr.name().to_owned(), attr.value().to_owned()))                      .collect(),                  self_closing: tag.self_closing,              }), diff --git a/src/attr.rs b/src/attr.rs index d0d506e..9e4c984 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -1,14 +1,125 @@ -use std::ops::Range; +//! Types for HTML attributes. -/// A HTML attribute value (plus spans). +use std::collections::{btree_map, BTreeMap}; +use std::iter::FromIterator; +use std::ops::{Index, Range}; + +use crate::offset::Offset; + +/// A map of HTML attributes. +/// +/// Does not preserve the order of attributes. +/// Iterating always yields attributes in order by name. +/// +/// # Example +/// +/// ``` +/// # use html5tokenizer::attr::AttributeMap; +/// let attrs: AttributeMap<()> = vec![("href".into(), "http://example.com".into())] +///     .into_iter() +///     .collect(); +/// assert_eq!(&attrs["href"], "http://example.com"); +/// ``` +#[derive(Debug, Default, PartialEq, Eq)] +pub struct AttributeMap<O> { +    pub(crate) inner: BTreeMap<String, AttrInternal<O>>, +} + +/// The value type internally used by the [`AttributeMap`]. +/// Not part of the public API.  #[derive(Debug, Eq, PartialEq)] -pub struct Attribute<O> { -    /// The value of the attribute. +pub(crate) struct AttrInternal<O> {      pub value: String, - -    /// The source code span of the attribute name.      pub name_span: Range<O>, - -    /// The source code span of the attribute value.      pub value_span: Range<O>,  } + +/// An HTML attribute borrowed from an [`AttributeMap`]. +#[derive(Debug, Eq, PartialEq)] +pub struct Attribute<'a, O> { +    name: &'a str, +    map_val: &'a AttrInternal<O>, +} + +impl<O> AttributeMap<O> { +    /// Returns the attribute with the given name. +    pub fn get(&self, name: &str) -> Option<Attribute<O>> { +        self.inner +            .get_key_value(name) +            .map(|(name, map_val)| Attribute { name, map_val }) +    } +} + +impl<'a, O: Offset> Attribute<'a, O> { +    /// Returns the attribute name. +    pub fn name(&self) -> &'a str { +        self.name +    } + +    /// Returns the attribute value. +    pub fn value(&self) -> &'a str { +        &self.map_val.value +    } + +    /// Returns the span of the attribute name. +    pub fn name_span(&self) -> Range<O> { +        self.map_val.name_span.clone() +    } + +    /// Returns the span of the attribute value. +    pub fn value_span(&self) -> Range<O> { +        self.map_val.value_span.clone() +    } +} + +// We cannot impl Index<Output=Attribute> because Index::index returns a reference of +// the Output type (and you cannot return a value referencing a temporary value). +impl<O> Index<&str> for AttributeMap<O> { +    type Output = str; + +    fn index(&self, name: &str) -> &Self::Output { +        &self.inner[name].value +    } +} + +impl<'a, O> IntoIterator for &'a AttributeMap<O> { +    type Item = Attribute<'a, O>; + +    type IntoIter = AttrIter<'a, O>; + +    fn into_iter(self) -> Self::IntoIter { +        AttrIter(self.inner.iter()) +    } +} + +/// A borrowed iterator over the attributes of an [`AttributeMap`]. +pub struct AttrIter<'a, S>(btree_map::Iter<'a, String, AttrInternal<S>>); + +impl<'a, S> Iterator for AttrIter<'a, S> { +    type Item = Attribute<'a, S>; + +    fn next(&mut self) -> Option<Self::Item> { +        let (name, map_val) = self.0.next()?; +        Some(Attribute { name, map_val }) +    } +} + +impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> { +    fn from_iter<T: IntoIterator<Item = (String, String)>>(iter: T) -> Self { +        Self { +            inner: iter +                .into_iter() +                .map(|(name, value)| { +                    ( +                        name, +                        AttrInternal { +                            value, +                            name_span: O::default()..O::default(), +                            value_span: O::default()..O::default(), +                        }, +                    ) +                }) +                .collect(), +        } +    } +} diff --git a/src/emitter.rs b/src/emitter.rs index d3258e2..8856589 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -1,5 +1,4 @@  use std::collections::btree_map::Entry; -use std::collections::BTreeMap;  use std::collections::BTreeSet;  use std::collections::VecDeque;  use std::mem; @@ -179,7 +178,7 @@ pub trait Emitter<O> {  pub struct DefaultEmitter<O = NoopOffset> {      current_characters: String,      current_token: Option<Token<O>>, -    current_attribute: Option<(String, crate::attr::Attribute<O>)>, +    current_attribute: Option<(String, crate::attr::AttrInternal<O>)>,      seen_attributes: BTreeSet<String>,      emitted_tokens: VecDeque<Token<O>>,      attr_in_end_tag_span: Option<Range<O>>, @@ -210,7 +209,7 @@ impl<O> DefaultEmitter<O> {      {          if let Some((k, v)) = self.current_attribute.take() {              match self.current_token { -                Some(Token::StartTag(ref mut tag)) => match tag.attributes.entry(k) { +                Some(Token::StartTag(ref mut tag)) => match tag.attributes.inner.entry(k) {                      Entry::Vacant(vacant) => {                          vacant.insert(v);                      } @@ -380,7 +379,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {          self.flush_current_attribute();          self.current_attribute = Some((              String::new(), -            crate::attr::Attribute { +            crate::attr::AttrInternal {                  name_span: offset..offset,                  value: String::new(),                  value_span: Range::default(), @@ -461,7 +460,7 @@ pub struct StartTag<O> {      ///      /// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own      /// [`Emitter`] to tweak this behavior. -    pub attributes: BTreeMap<String, crate::attr::Attribute<O>>, +    pub attributes: crate::attr::AttributeMap<O>,      /// The source code span of the tag.      pub span: Range<O>, @@ -3,7 +3,7 @@  #![forbid(unsafe_code)]  #![doc = include_str!("../README.md")] -mod attr; +pub mod attr;  mod emitter;  mod entities;  mod error; @@ -16,7 +16,6 @@ mod utils;  #[cfg(feature = "integration-tests")]  pub use utils::State as InternalState; -pub use attr::Attribute;  pub use emitter::{Comment, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};  pub use error::Error;  pub use tokenizer::{State, Tokenizer}; diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 33f5d11..99ff9ee 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -110,8 +110,8 @@ fn attribute_name_span() {      let Token::StartTag(tag) = tokenizer(html).next().unwrap() else {          panic!("expected start tag")      }; -    for (_name, attr) in tag.attributes { -        labels.push((attr.name_span, "")); +    for attr in &tag.attributes { +        labels.push((attr.name_span(), ""));      }      assert_snapshot!(annotate(html, labels), @r###"      <test x xyz y=VAL xy=VAL z = VAL yzx = VAL> @@ -126,8 +126,8 @@ fn attribute_value_span() {      let Token::StartTag(tag) = tokenizer(html).next().unwrap() else {          panic!("expected start tag")      }; -    for (_name, attr) in tag.attributes { -        labels.push((attr.value_span, "")); +    for attr in &tag.attributes { +        labels.push((attr.value_span(), ""));      }      assert_snapshot!(annotate(html, labels), @r###"      <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''>  | 
