diff options
author | Martin Fischer <martin@push-f.com> | 2023-08-16 09:45:18 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-08-19 13:41:55 +0200 |
commit | 65aca9cbf0318bd3a2f936641b4f5bc3729c98c2 (patch) | |
tree | 3bf6ae3ea03c1f377e9b4bd6fd7d6af99fbddc5a /src | |
parent | 0f460c2e77f450a2bac68eec97b2c62aa33c0495 (diff) |
break!: introduce AttributeMap
This has a number of benefits:
* it hides the implementation of the map
* it hides the type used for the map values
(which lets us e.g. change name_span to name_offset while still
being able to provide a convenient `Attribute::name_span` method.)
* it lets us provide convenience impls for the map
such as `FromIterator<(String, String)>`
Diffstat (limited to 'src')
-rw-r--r-- | src/attr.rs | 127 | ||||
-rw-r--r-- | src/emitter.rs | 9 | ||||
-rw-r--r-- | src/lib.rs | 3 |
3 files changed, 124 insertions, 15 deletions
diff --git a/src/attr.rs b/src/attr.rs index d0d506e..9e4c984 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -1,14 +1,125 @@ -use std::ops::Range; +//! Types for HTML attributes. -/// A HTML attribute value (plus spans). +use std::collections::{btree_map, BTreeMap}; +use std::iter::FromIterator; +use std::ops::{Index, Range}; + +use crate::offset::Offset; + +/// A map of HTML attributes. +/// +/// Does not preserve the order of attributes. +/// Iterating always yields attributes in order by name. +/// +/// # Example +/// +/// ``` +/// # use html5tokenizer::attr::AttributeMap; +/// let attrs: AttributeMap<()> = vec![("href".into(), "http://example.com".into())] +/// .into_iter() +/// .collect(); +/// assert_eq!(&attrs["href"], "http://example.com"); +/// ``` +#[derive(Debug, Default, PartialEq, Eq)] +pub struct AttributeMap<O> { + pub(crate) inner: BTreeMap<String, AttrInternal<O>>, +} + +/// The value type internally used by the [`AttributeMap`]. +/// Not part of the public API. #[derive(Debug, Eq, PartialEq)] -pub struct Attribute<O> { - /// The value of the attribute. +pub(crate) struct AttrInternal<O> { pub value: String, - - /// The source code span of the attribute name. pub name_span: Range<O>, - - /// The source code span of the attribute value. pub value_span: Range<O>, } + +/// An HTML attribute borrowed from an [`AttributeMap`]. +#[derive(Debug, Eq, PartialEq)] +pub struct Attribute<'a, O> { + name: &'a str, + map_val: &'a AttrInternal<O>, +} + +impl<O> AttributeMap<O> { + /// Returns the attribute with the given name. + pub fn get(&self, name: &str) -> Option<Attribute<O>> { + self.inner + .get_key_value(name) + .map(|(name, map_val)| Attribute { name, map_val }) + } +} + +impl<'a, O: Offset> Attribute<'a, O> { + /// Returns the attribute name. + pub fn name(&self) -> &'a str { + self.name + } + + /// Returns the attribute value. + pub fn value(&self) -> &'a str { + &self.map_val.value + } + + /// Returns the span of the attribute name. + pub fn name_span(&self) -> Range<O> { + self.map_val.name_span.clone() + } + + /// Returns the span of the attribute value. + pub fn value_span(&self) -> Range<O> { + self.map_val.value_span.clone() + } +} + +// We cannot impl Index<Output=Attribute> because Index::index returns a reference of +// the Output type (and you cannot return a value referencing a temporary value). +impl<O> Index<&str> for AttributeMap<O> { + type Output = str; + + fn index(&self, name: &str) -> &Self::Output { + &self.inner[name].value + } +} + +impl<'a, O> IntoIterator for &'a AttributeMap<O> { + type Item = Attribute<'a, O>; + + type IntoIter = AttrIter<'a, O>; + + fn into_iter(self) -> Self::IntoIter { + AttrIter(self.inner.iter()) + } +} + +/// A borrowed iterator over the attributes of an [`AttributeMap`]. +pub struct AttrIter<'a, S>(btree_map::Iter<'a, String, AttrInternal<S>>); + +impl<'a, S> Iterator for AttrIter<'a, S> { + type Item = Attribute<'a, S>; + + fn next(&mut self) -> Option<Self::Item> { + let (name, map_val) = self.0.next()?; + Some(Attribute { name, map_val }) + } +} + +impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> { + fn from_iter<T: IntoIterator<Item = (String, String)>>(iter: T) -> Self { + Self { + inner: iter + .into_iter() + .map(|(name, value)| { + ( + name, + AttrInternal { + value, + name_span: O::default()..O::default(), + value_span: O::default()..O::default(), + }, + ) + }) + .collect(), + } + } +} diff --git a/src/emitter.rs b/src/emitter.rs index d3258e2..8856589 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -1,5 +1,4 @@ use std::collections::btree_map::Entry; -use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::VecDeque; use std::mem; @@ -179,7 +178,7 @@ pub trait Emitter<O> { pub struct DefaultEmitter<O = NoopOffset> { current_characters: String, current_token: Option<Token<O>>, - current_attribute: Option<(String, crate::attr::Attribute<O>)>, + current_attribute: Option<(String, crate::attr::AttrInternal<O>)>, seen_attributes: BTreeSet<String>, emitted_tokens: VecDeque<Token<O>>, attr_in_end_tag_span: Option<Range<O>>, @@ -210,7 +209,7 @@ impl<O> DefaultEmitter<O> { { if let Some((k, v)) = self.current_attribute.take() { match self.current_token { - Some(Token::StartTag(ref mut tag)) => match tag.attributes.entry(k) { + Some(Token::StartTag(ref mut tag)) => match tag.attributes.inner.entry(k) { Entry::Vacant(vacant) => { vacant.insert(v); } @@ -380,7 +379,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { self.flush_current_attribute(); self.current_attribute = Some(( String::new(), - crate::attr::Attribute { + crate::attr::AttrInternal { name_span: offset..offset, value: String::new(), value_span: Range::default(), @@ -461,7 +460,7 @@ pub struct StartTag<O> { /// /// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own /// [`Emitter`] to tweak this behavior. - pub attributes: BTreeMap<String, crate::attr::Attribute<O>>, + pub attributes: crate::attr::AttributeMap<O>, /// The source code span of the tag. pub span: Range<O>, @@ -3,7 +3,7 @@ #![forbid(unsafe_code)] #![doc = include_str!("../README.md")] -mod attr; +pub mod attr; mod emitter; mod entities; mod error; @@ -16,7 +16,6 @@ mod utils; #[cfg(feature = "integration-tests")] pub use utils::State as InternalState; -pub use attr::Attribute; pub use emitter::{Comment, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; pub use error::Error; pub use tokenizer::{State, Tokenizer}; |