summaryrefslogtreecommitdiff
path: root/src/token.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/token.rs')
-rw-r--r--src/token.rs217
1 files changed, 71 insertions, 146 deletions
diff --git a/src/token.rs b/src/token.rs
index ed8c8c8..4f3c0ce 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -2,32 +2,30 @@
use std::collections::{btree_map, BTreeMap};
use std::iter::FromIterator;
-use std::ops::{Index, Range};
-
-use crate::offset::Offset;
+use std::ops::Index;
/// A type for the tokens emitted by a WHATWG-compliant HTML tokenizer.
#[derive(Debug, Eq, PartialEq)]
-pub enum Token<O> {
+pub enum Token {
/// A literal character, a resolved character reference,
/// or part of a resolved character reference (since some
/// character references resolve to two `char`s).
Char(char),
/// An HTML start tag.
- StartTag(StartTag<O>),
+ StartTag(StartTag),
/// An HTML end tag.
- EndTag(EndTag<O>),
+ EndTag(EndTag),
/// An HTML comment.
- Comment(Comment<O>),
+ Comment(String),
/// An HTML doctype declaration.
- Doctype(Doctype<O>),
+ Doctype(Doctype),
/// An end-of-file token.
EndOfFile,
}
/// An HTML start tag, such as `<p>` or `<a>`.
#[derive(Debug, Eq, PartialEq)]
-pub struct StartTag<O> {
+pub struct StartTag {
/// Whether this tag is self-closing. If it is self-closing, no following [`EndTag`] should be
/// expected.
pub self_closing: bool,
@@ -39,43 +37,15 @@ pub struct StartTag<O> {
/// A mapping for any HTML attributes this start tag may have.
///
/// Duplicate attributes are ignored after the first one as per WHATWG spec.
- pub attributes: AttributeMap<O>,
-
- /// The source code span of the tag.
- pub span: Range<O>,
-
- /// The span of the tag name.
- pub name_span: Range<O>,
+ pub attributes: AttributeMap,
}
/// An HTML end/close tag, such as `</p>` or `</a>`.
#[derive(Debug, Eq, PartialEq)]
-pub struct EndTag<O> {
+pub struct EndTag {
/// The tag name.
/// Uppercase ASCII characters (A-Z) have been converted to lowercase.
pub name: String,
-
- /// The source code span of the tag.
- pub span: Range<O>,
-
- /// The span of the tag name.
- pub name_span: Range<O>,
-}
-
-/// An HTML comment.
-#[derive(PartialEq, Eq, Debug)]
-pub struct Comment<O> {
- /// The text within the comment.
- pub data: String,
- /// The source offset of the comment data.
- pub data_span: Range<O>,
-}
-
-impl<O: Offset> Comment<O> {
- /// Returns the span for the comment data.
- pub fn data_span(&self) -> Range<O> {
- self.data_span.clone()
- }
}
/// A doctype. Some examples:
@@ -85,7 +55,7 @@ impl<O: Offset> Comment<O> {
/// * `<!DOCTYPE {name} SYSTEM '{system_id}'>`
/// * `<!DOCTYPE {name} PUBLIC '{public_id}' '{system_id}'>`
#[derive(Debug, Eq, PartialEq)]
-pub struct Doctype<O> {
+pub struct Doctype {
/// The [force-quirks flag].
///
/// [force-quirks flag]: https://html.spec.whatwg.org/multipage/parsing.html#force-quirks-flag
@@ -100,38 +70,6 @@ pub struct Doctype<O> {
/// The doctype's system identifier.
pub system_id: Option<String>,
-
- /// The source code span of the doctype.
- pub span: Range<O>,
-
- /// The span of the name.
- pub(crate) name_span: Range<O>,
-
- /// The span of the public identifier.
- pub(crate) public_id_span: Range<O>,
-
- /// The span of the system identifier.
- pub(crate) system_id_span: Range<O>,
-}
-
-impl<O: Offset> Doctype<O> {
- /// Returns the span of the name.
- pub fn name_span(&self) -> Option<Range<O>> {
- self.name.as_ref()?;
- Some(self.name_span.clone())
- }
-
- /// Returns the span of the public identifier.
- pub fn public_id_span(&self) -> Option<Range<O>> {
- self.public_id.as_ref()?;
- Some(self.public_id_span.clone())
- }
-
- /// Returns the span of the system identifier.
- pub fn system_id_span(&self) -> Option<Range<O>> {
- self.system_id.as_ref()?;
- Some(self.system_id_span.clone())
- }
}
/// A map of HTML attributes.
@@ -143,79 +81,81 @@ impl<O: Offset> Doctype<O> {
///
/// ```
/// # use html5tokenizer::attr::AttributeMap;
-/// let attrs: AttributeMap<()> = vec![("href".into(), "http://example.com".into())]
+/// let attrs: AttributeMap = vec![("href".into(), "http://example.com".into())]
/// .into_iter()
/// .collect();
/// assert_eq!(&attrs["href"], "http://example.com");
/// ```
#[derive(Debug, Default, PartialEq, Eq)]
-pub struct AttributeMap<O> {
- pub(crate) inner: BTreeMap<String, AttrInternal<O>>,
+pub struct AttributeMap {
+ pub(crate) inner: BTreeMap<String, AttrInternal>,
}
/// The value type internally used by the [`AttributeMap`].
/// Not part of the public API.
-#[derive(Default, Debug, Eq, PartialEq)]
-pub(crate) struct AttrInternal<O> {
+#[derive(Default, Debug, Eq)]
+pub(crate) struct AttrInternal {
pub value: String,
- /// The span of the attribute name.
- pub name_span: Range<O>,
- /// The span of the attribute value.
- /// For the empty attribute syntax this is just `O::default()..O::default()`.
- /// We intentionally don't use `Option<Range<O>>` here to spare us a byte (and padding) per attribute.
- pub value_span: Range<O>,
- pub value_syntax: Option<AttrValueSyntax>,
+ pub trace_idx: Option<AttributeTraceIdx>,
}
-/// The syntax of the attribute value.
-#[derive(Clone, Copy, PartialEq, Eq, Debug)]
-pub enum AttrValueSyntax {
- /// An unquoted attribute value, e.g. `id=foo`.
- Unquoted,
- /// A single-quoted attribute value, e.g. `id='foo'`.
- SingleQuoted,
- /// A double-quoted attribute value, e.g. `id="foo"`.
- DoubleQuoted,
+/// The index of an [`AttributeTrace`] within an [`AttributeTraceList`].
+///
+/// [`AttributeTrace`]: crate::trace::AttributeTrace
+/// [`AttributeTraceList`]: crate::trace::AttributeTraceList
+#[derive(Clone, Copy, Eq, PartialEq, Debug)]
+pub struct AttributeTraceIdx(
+ // Using NonZeroUsize so that `Option<AttributeTraceIdx>`
+ // has the same size as `AttributeTraceIdx`.
+ pub std::num::NonZeroUsize,
+);
+
+impl PartialEq for AttrInternal {
+ fn eq(&self, other: &Self) -> bool {
+ // We intentionally don't include the trace_idx,
+ // so that PartialEq of Token only compares semantics.
+ self.value == other.value
+ }
}
/// An HTML attribute borrowed from an [`AttributeMap`].
#[derive(Debug, Eq, PartialEq)]
-pub struct Attribute<'a, O> {
+pub struct Attribute<'a> {
name: &'a str,
- map_val: &'a AttrInternal<O>,
+ map_val: &'a AttrInternal,
}
/// An owned HTML attribute.
#[derive(Debug, PartialEq, Eq)]
-pub struct AttributeOwned<O> {
+pub struct AttributeOwned {
/// The attribute name.
/// Uppercase ASCII characters (A-Z) have been converted to lowercase.
pub name: String,
/// The attribute value. Character references have been resolved.
pub value: String,
- /// The span of the attribute name.
- pub name_span: Range<O>,
- /// The span of the attribute value.
- /// `None` in case of the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
- pub value_span: Option<Range<O>>,
- /// The syntax of the attribute value.
- /// `None` indicates the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
- pub value_syntax: Option<AttrValueSyntax>,
+ /// The index of the corresponding [`AttributeTrace`] in the
+ /// `attribute_traces` field of [`StartTagTrace`], in case this attribute
+ /// was present in the source and the [`Emitter`] has tracked this.
+ ///
+ /// [`AttributeTrace`]: super::trace::AttributeTrace
+ /// [`StartTagTrace`]: super::trace::AttributeTrace
+ /// [`Emitter`]: super::Emitter
+ pub trace_idx: Option<AttributeTraceIdx>,
}
-impl<O> AttributeMap<O> {
+impl AttributeMap {
/// Returns the attribute with the given name.
///
/// The name must not contain any uppercase ASCII character (A-Z)
/// or the method will always return `None`.
- pub fn get(&self, name: &str) -> Option<Attribute<O>> {
+ pub fn get(&self, name: &str) -> Option<Attribute> {
self.inner
.get_key_value(name)
.map(|(name, map_val)| Attribute { name, map_val })
}
}
-impl<'a, O: Offset> Attribute<'a, O> {
+impl<'a> Attribute<'a> {
/// Returns the attribute name.
/// Uppercase ASCII characters (A-Z) have been converted to lowercase.
pub fn name(&self) -> &'a str {
@@ -227,32 +167,21 @@ impl<'a, O: Offset> Attribute<'a, O> {
&self.map_val.value
}
- /// Returns the span of the attribute name.
- pub fn name_span(&self) -> Range<O> {
- self.map_val.name_span.clone()
- }
-
- /// For explicitly defined values returns the span of the attribute value.
- ///
- /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
- pub fn value_span(&self) -> Option<Range<O>> {
- if self.map_val.value_syntax.is_none() {
- return None;
- }
- Some(self.map_val.value_span.clone())
- }
-
- /// Returns the attribute value syntax in case the value is explicitly defined.
+ /// Returns the index of the corresponding [`AttributeTrace`] in the
+ /// `attribute_traces` field of [`StartTagTrace`], in case this attribute
+ /// was present in the source and the [`Emitter`] has tracked that.
///
- /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
- pub fn value_syntax(&self) -> Option<AttrValueSyntax> {
- self.map_val.value_syntax
+ /// [`AttributeTrace`]: super::trace::AttributeTrace
+ /// [`StartTagTrace`]: super::trace::AttributeTrace
+ /// [`Emitter`]: super::Emitter
+ pub fn trace_idx(&self) -> Option<AttributeTraceIdx> {
+ self.map_val.trace_idx
}
}
// We cannot impl Index<Output=Attribute> because Index::index returns a reference of
// the Output type (and you cannot return a value referencing a temporary value).
-impl<O> Index<&str> for AttributeMap<O> {
+impl Index<&str> for AttributeMap {
type Output = str;
/// Returns the attribute value with the given name.
@@ -264,10 +193,10 @@ impl<O> Index<&str> for AttributeMap<O> {
}
}
-impl<O> IntoIterator for AttributeMap<O> {
- type Item = AttributeOwned<O>;
+impl IntoIterator for AttributeMap {
+ type Item = AttributeOwned;
- type IntoIter = AttrIntoIter<O>;
+ type IntoIter = AttrIntoIter;
fn into_iter(self) -> Self::IntoIter {
AttrIntoIter(self.inner.into_iter())
@@ -275,27 +204,25 @@ impl<O> IntoIterator for AttributeMap<O> {
}
/// A consuming iterator over the attributes of an [`AttributeMap`].
-pub struct AttrIntoIter<O>(btree_map::IntoIter<String, AttrInternal<O>>);
+pub struct AttrIntoIter(btree_map::IntoIter<String, AttrInternal>);
-impl<O> Iterator for AttrIntoIter<O> {
- type Item = AttributeOwned<O>;
+impl Iterator for AttrIntoIter {
+ type Item = AttributeOwned;
fn next(&mut self) -> Option<Self::Item> {
let (name, map_val) = self.0.next()?;
Some(AttributeOwned {
name,
value: map_val.value,
- name_span: map_val.name_span,
- value_span: map_val.value_syntax.is_some().then_some(map_val.value_span),
- value_syntax: map_val.value_syntax,
+ trace_idx: map_val.trace_idx,
})
}
}
-impl<'a, O> IntoIterator for &'a AttributeMap<O> {
- type Item = Attribute<'a, O>;
+impl<'a> IntoIterator for &'a AttributeMap {
+ type Item = Attribute<'a>;
- type IntoIter = AttrIter<'a, O>;
+ type IntoIter = AttrIter<'a>;
fn into_iter(self) -> Self::IntoIter {
AttrIter(self.inner.iter())
@@ -303,10 +230,10 @@ impl<'a, O> IntoIterator for &'a AttributeMap<O> {
}
/// A borrowed iterator over the attributes of an [`AttributeMap`].
-pub struct AttrIter<'a, S>(btree_map::Iter<'a, String, AttrInternal<S>>);
+pub struct AttrIter<'a>(btree_map::Iter<'a, String, AttrInternal>);
-impl<'a, S> Iterator for AttrIter<'a, S> {
- type Item = Attribute<'a, S>;
+impl<'a> Iterator for AttrIter<'a> {
+ type Item = Attribute<'a>;
fn next(&mut self) -> Option<Self::Item> {
let (name, map_val) = self.0.next()?;
@@ -314,7 +241,7 @@ impl<'a, S> Iterator for AttrIter<'a, S> {
}
}
-impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> {
+impl FromIterator<(String, String)> for AttributeMap {
fn from_iter<T: IntoIterator<Item = (String, String)>>(iter: T) -> Self {
Self {
inner: iter
@@ -324,9 +251,7 @@ impl<O: Default> FromIterator<(String, String)> for AttributeMap<O> {
name,
AttrInternal {
value,
- name_span: O::default()..O::default(),
- value_span: O::default()..O::default(),
- value_syntax: Some(AttrValueSyntax::DoubleQuoted),
+ trace_idx: None,
},
)
})