summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-16 09:45:18 +0200
committerMartin Fischer <martin@push-f.com>2023-08-19 13:41:55 +0200
commit65aca9cbf0318bd3a2f936641b4f5bc3729c98c2 (patch)
tree3bf6ae3ea03c1f377e9b4bd6fd7d6af99fbddc5a
parent0f460c2e77f450a2bac68eec97b2c62aa33c0495 (diff)
break!: introduce AttributeMap
This has a number of benefits: * it hides the implementation of the map * it hides the type used for the map values (which lets us e.g. change name_span to name_offset while still being able to provide a convenient `Attribute::name_span` method.) * it lets us provide convenience impls for the map such as `FromIterator<(String, String)>`
-rw-r--r--integration_tests/tests/test_html5lib.rs2
-rw-r--r--src/attr.rs127
-rw-r--r--src/emitter.rs9
-rw-r--r--src/lib.rs3
-rw-r--r--tests/test_spans.rs8
5 files changed, 129 insertions, 20 deletions
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs
index 0f96063..a624c30 100644
--- a/integration_tests/tests/test_html5lib.rs
+++ b/integration_tests/tests/test_html5lib.rs
@@ -129,7 +129,7 @@ fn run_test_inner<R: Reader>(
attributes: tag
.attributes
.into_iter()
- .map(|(name, map_val)| (name, map_val.value))
+ .map(|attr| (attr.name().to_owned(), attr.value().to_owned()))
.collect(),
self_closing: tag.self_closing,
}),
diff --git a/src/attr.rs b/src/attr.rs
index d0d506e..9e4c984 100644
--- a/src/attr.rs
+++ b/src/attr.rs
@@ -1,14 +1,125 @@
-use std::ops::Range;
+//! Types for HTML attributes.
-/// A HTML attribute value (plus spans).
+use std::collections::{btree_map, BTreeMap};
+use std::iter::FromIterator;
+use std::ops::{Index, Range};
+
+use crate::offset::Offset;
+
+/// A map of HTML attributes.
+///
+/// Does not preserve the order of attributes.
+/// Iterating always yields attributes in order by name.
+///
+/// # Example
+///
+/// ```
+/// # use html5tokenizer::attr::AttributeMap;
+/// let attrs: AttributeMap<()> = vec![("href".into(), "http://example.com".into())]
+/// .into_iter()
+/// .collect();
+/// assert_eq!(&attrs["href"], "http://example.com");
+/// ```
+#[derive(Debug, Default, PartialEq, Eq)]
+pub struct AttributeMap<O> {
+ pub(crate) inner: BTreeMap<String, AttrInternal<O>>,
+}
+
+/// The value type internally used by the [`AttributeMap`].
+/// Not part of the public API.
#[derive(Debug, Eq, PartialEq)]
-pub struct Attribute<O> {
- /// The value of the attribute.
+pub(crate) struct AttrInternal<O> {
pub value: String,
-
- /// The source code span of the attribute name.
pub name_span: Range<O>,
-
- /// The source code span of the attribute value.
pub value_span: Range<O>,
}
+
+/// An HTML attribute borrowed from an [`AttributeMap`].
+#[derive(Debug, Eq, PartialEq)]
+pub struct Attribute<'a, O> {
+ name: &'a str,
+ map_val: &'a AttrInternal<O>,
+}
+
+impl<O> AttributeMap<O> {
+ /// Returns the attribute with the given name.
+ pub fn get(&self, name: &str) -> Option<Attribute<O>> {
+ self.inner
+ .get_key_value(name)
+ .map(|(name, map_val)| Attribute { name, map_val })
+ }
+}
+
+impl<'a, O: Offset> Attribute<'a, O> {
+ /// Returns the attribute name.
+ pub fn name(&self) -> &'a str {
+ self.name
+ }
+
+ /// Returns the attribute value.
+ pub fn value(&self) -> &'a str {
+ &self.map_val.value
+ }
+
+ /// Returns the span of the attribute name.
+ pub fn name_span(&self) -> Range<O> {
+ self.map_val.name_span.clone()
+ }
+
+ /// Returns the span of the attribute value.
+ pub fn value_span(&self) -> Range<O> {
+ self.map_val.value_span.clone()
+ }
+}
+
+// We cannot impl Index<Output=Attribute> because Index::index returns a reference of
+// the Output type (and you cannot return a value referencing a temporary value).
+impl<O> Index<&str> for AttributeMap<O> {
+ type Output = str;
+
+ fn index(&self, name: &str) -> &Self::Output {
+ &self.inner[name].value
+ }
+}
+
+impl<'a, O> IntoIterator for &'a AttributeMap<O> {
+ type Item = Attribute<'a, O>;
+
+ type IntoIter = AttrIter<'a, O>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ AttrIter(self.inner.iter())
+ }
+}
+
+/// A borrowed iterator over the attributes of an [`AttributeMap`].
+pub struct AttrIter<'a, S>(btree_map::Iter<'a, String, AttrInternal<S>>);
+
+impl<'a, S> Iterator for AttrIter<'a, S> {
+ type Item = Attribute<'a, S>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let (name, map_val) = self.0.next()?;
+ Some(Attribute { name, map_val })
+ }
+}
+
+impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> {
+ fn from_iter<T: IntoIterator<Item = (String, String)>>(iter: T) -> Self {
+ Self {
+ inner: iter
+ .into_iter()
+ .map(|(name, value)| {
+ (
+ name,
+ AttrInternal {
+ value,
+ name_span: O::default()..O::default(),
+ value_span: O::default()..O::default(),
+ },
+ )
+ })
+ .collect(),
+ }
+ }
+}
diff --git a/src/emitter.rs b/src/emitter.rs
index d3258e2..8856589 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -1,5 +1,4 @@
use std::collections::btree_map::Entry;
-use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::VecDeque;
use std::mem;
@@ -179,7 +178,7 @@ pub trait Emitter<O> {
pub struct DefaultEmitter<O = NoopOffset> {
current_characters: String,
current_token: Option<Token<O>>,
- current_attribute: Option<(String, crate::attr::Attribute<O>)>,
+ current_attribute: Option<(String, crate::attr::AttrInternal<O>)>,
seen_attributes: BTreeSet<String>,
emitted_tokens: VecDeque<Token<O>>,
attr_in_end_tag_span: Option<Range<O>>,
@@ -210,7 +209,7 @@ impl<O> DefaultEmitter<O> {
{
if let Some((k, v)) = self.current_attribute.take() {
match self.current_token {
- Some(Token::StartTag(ref mut tag)) => match tag.attributes.entry(k) {
+ Some(Token::StartTag(ref mut tag)) => match tag.attributes.inner.entry(k) {
Entry::Vacant(vacant) => {
vacant.insert(v);
}
@@ -380,7 +379,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
self.flush_current_attribute();
self.current_attribute = Some((
String::new(),
- crate::attr::Attribute {
+ crate::attr::AttrInternal {
name_span: offset..offset,
value: String::new(),
value_span: Range::default(),
@@ -461,7 +460,7 @@ pub struct StartTag<O> {
///
/// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own
/// [`Emitter`] to tweak this behavior.
- pub attributes: BTreeMap<String, crate::attr::Attribute<O>>,
+ pub attributes: crate::attr::AttributeMap<O>,
/// The source code span of the tag.
pub span: Range<O>,
diff --git a/src/lib.rs b/src/lib.rs
index 4f2cf9c..cbaf94d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,7 +3,7 @@
#![forbid(unsafe_code)]
#![doc = include_str!("../README.md")]
-mod attr;
+pub mod attr;
mod emitter;
mod entities;
mod error;
@@ -16,7 +16,6 @@ mod utils;
#[cfg(feature = "integration-tests")]
pub use utils::State as InternalState;
-pub use attr::Attribute;
pub use emitter::{Comment, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};
pub use error::Error;
pub use tokenizer::{State, Tokenizer};
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index 33f5d11..99ff9ee 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -110,8 +110,8 @@ fn attribute_name_span() {
let Token::StartTag(tag) = tokenizer(html).next().unwrap() else {
panic!("expected start tag")
};
- for (_name, attr) in tag.attributes {
- labels.push((attr.name_span, ""));
+ for attr in &tag.attributes {
+ labels.push((attr.name_span(), ""));
}
assert_snapshot!(annotate(html, labels), @r###"
<test x xyz y=VAL xy=VAL z = VAL yzx = VAL>
@@ -126,8 +126,8 @@ fn attribute_value_span() {
let Token::StartTag(tag) = tokenizer(html).next().unwrap() else {
panic!("expected start tag")
};
- for (_name, attr) in tag.attributes {
- labels.push((attr.value_span, ""));
+ for attr in &tag.attributes {
+ labels.push((attr.value_span(), ""));
}
assert_snapshot!(annotate(html, labels), @r###"
<test x=unquoted y = unquoted z='single-quoted' zz="double-quoted" empty=''>