diff options
-rw-r--r-- | integration_tests/tests/test_html5lib.rs | 2 | ||||
-rw-r--r-- | src/attr.rs | 127 | ||||
-rw-r--r-- | src/emitter.rs | 9 | ||||
-rw-r--r-- | src/lib.rs | 3 | ||||
-rw-r--r-- | tests/test_spans.rs | 8 |
5 files changed, 129 insertions, 20 deletions
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index 0f96063..a624c30 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -129,7 +129,7 @@ fn run_test_inner<R: Reader>( attributes: tag .attributes .into_iter() - .map(|(name, map_val)| (name, map_val.value)) + .map(|attr| (attr.name().to_owned(), attr.value().to_owned())) .collect(), self_closing: tag.self_closing, }), diff --git a/src/attr.rs b/src/attr.rs index d0d506e..9e4c984 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -1,14 +1,125 @@ -use std::ops::Range; +//! Types for HTML attributes. -/// A HTML attribute value (plus spans). +use std::collections::{btree_map, BTreeMap}; +use std::iter::FromIterator; +use std::ops::{Index, Range}; + +use crate::offset::Offset; + +/// A map of HTML attributes. +/// +/// Does not preserve the order of attributes. +/// Iterating always yields attributes in order by name. +/// +/// # Example +/// +/// ``` +/// # use html5tokenizer::attr::AttributeMap; +/// let attrs: AttributeMap<()> = vec![("href".into(), "http://example.com".into())] +/// .into_iter() +/// .collect(); +/// assert_eq!(&attrs["href"], "http://example.com"); +/// ``` +#[derive(Debug, Default, PartialEq, Eq)] +pub struct AttributeMap<O> { + pub(crate) inner: BTreeMap<String, AttrInternal<O>>, +} + +/// The value type internally used by the [`AttributeMap`]. +/// Not part of the public API. #[derive(Debug, Eq, PartialEq)] -pub struct Attribute<O> { - /// The value of the attribute. +pub(crate) struct AttrInternal<O> { pub value: String, - - /// The source code span of the attribute name. pub name_span: Range<O>, - - /// The source code span of the attribute value. pub value_span: Range<O>, } + +/// An HTML attribute borrowed from an [`AttributeMap`]. +#[derive(Debug, Eq, PartialEq)] +pub struct Attribute<'a, O> { + name: &'a str, + map_val: &'a AttrInternal<O>, +} + +impl<O> AttributeMap<O> { + /// Returns the attribute with the given name. + pub fn get(&self, name: &str) -> Option<Attribute<O>> { + self.inner + .get_key_value(name) + .map(|(name, map_val)| Attribute { name, map_val }) + } +} + +impl<'a, O: Offset> Attribute<'a, O> { + /// Returns the attribute name. + pub fn name(&self) -> &'a str { + self.name + } + + /// Returns the attribute value. + pub fn value(&self) -> &'a str { + &self.map_val.value + } + + /// Returns the span of the attribute name. + pub fn name_span(&self) -> Range<O> { + self.map_val.name_span.clone() + } + + /// Returns the span of the attribute value. + pub fn value_span(&self) -> Range<O> { + self.map_val.value_span.clone() + } +} + +// We cannot impl Index<Output=Attribute> because Index::index returns a reference of +// the Output type (and you cannot return a value referencing a temporary value). +impl<O> Index<&str> for AttributeMap<O> { + type Output = str; + + fn index(&self, name: &str) -> &Self::Output { + &self.inner[name].value + } +} + +impl<'a, O> IntoIterator for &'a AttributeMap<O> { + type Item = Attribute<'a, O>; + + type IntoIter = AttrIter<'a, O>; + + fn into_iter(self) -> Self::IntoIter { + AttrIter(self.inner.iter()) + } +} + +/// A borrowed iterator over the attributes of an [`AttributeMap`]. +pub struct AttrIter<'a, S>(btree_map::Iter<'a, String, AttrInternal<S>>); + +impl<'a, S> Iterator for AttrIter<'a, S> { + type Item = Attribute<'a, S>; + + fn next(&mut self) -> Option<Self::Item> { + let (name, map_val) = self.0.next()?; + Some(Attribute { name, map_val }) + } +} + +impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> { + fn from_iter<T: IntoIterator<Item = (String, String)>>(iter: T) -> Self { + Self { + inner: iter + .into_iter() + .map(|(name, value)| { + ( + name, + AttrInternal { + value, + name_span: O::default()..O::default(), + value_span: O::default()..O::default(), + }, + ) + }) + .collect(), + } + } +} diff --git a/src/emitter.rs b/src/emitter.rs index d3258e2..8856589 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -1,5 +1,4 @@ use std::collections::btree_map::Entry; -use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::VecDeque; use std::mem; @@ -179,7 +178,7 @@ pub trait Emitter<O> { pub struct DefaultEmitter<O = NoopOffset> { current_characters: String, current_token: Option<Token<O>>, - current_attribute: Option<(String, crate::attr::Attribute<O>)>, + current_attribute: Option<(String, crate::attr::AttrInternal<O>)>, seen_attributes: BTreeSet<String>, emitted_tokens: VecDeque<Token<O>>, attr_in_end_tag_span: Option<Range<O>>, @@ -210,7 +209,7 @@ impl<O> DefaultEmitter<O> { { if let Some((k, v)) = self.current_attribute.take() { match self.current_token { - Some(Token::StartTag(ref mut tag)) => match tag.attributes.entry(k) { + Some(Token::StartTag(ref mut tag)) => match tag.attributes.inner.entry(k) { Entry::Vacant(vacant) => { vacant.insert(v); } @@ -380,7 +379,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { self.flush_current_attribute(); self.current_attribute = Some(( String::new(), - crate::attr::Attribute { + crate::attr::AttrInternal { name_span: offset..offset, value: String::new(), value_span: Range::default(), @@ -461,7 +460,7 @@ pub struct StartTag<O> { /// /// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own /// [`Emitter`] to tweak this behavior. - pub attributes: BTreeMap<String, crate::attr::Attribute<O>>, + pub attributes: crate::attr::AttributeMap<O>, /// The source code span of the tag. pub span: Range<O>, @@ -3,7 +3,7 @@ #![forbid(unsafe_code)] #![doc = include_str!("../README.md")] -mod attr; +pub mod attr; mod emitter; mod entities; mod error; @@ -16,7 +16,6 @@ mod utils; #[cfg(feature = "integration-tests")] pub use utils::State as InternalState; -pub use attr::Attribute; pub use emitter::{Comment, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; pub use error::Error; pub use tokenizer::{State, Tokenizer}; diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 33f5d11..99ff9ee 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -110,8 +110,8 @@ fn attribute_name_span() { let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { panic!("expected start tag") }; - for (_name, attr) in tag.attributes { - labels.push((attr.name_span, "")); + for attr in &tag.attributes { + labels.push((attr.name_span(), "")); } assert_snapshot!(annotate(html, labels), @r###" <test x xyz y=VAL xy=VAL z = VAL yzx = VAL> @@ -126,8 +126,8 @@ fn attribute_value_span() { let Token::StartTag(tag) = tokenizer(html).next().unwrap() else { panic!("expected start tag") }; - for (_name, attr) in tag.attributes { - labels.push((attr.value_span, "")); + for attr in &tag.attributes { + labels.push((attr.value_span(), "")); } assert_snapshot!(annotate(html, labels), @r###" <test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''> |