aboutsummaryrefslogtreecommitdiff
path: root/src/trace.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/trace.rs')
-rw-r--r--src/trace.rs241
1 files changed, 241 insertions, 0 deletions
diff --git a/src/trace.rs b/src/trace.rs
new file mode 100644
index 0000000..a816429
--- /dev/null
+++ b/src/trace.rs
@@ -0,0 +1,241 @@
+//! Provides the [`Trace`] type (byte offsets and syntax information about tokens).
+
+use std::{
+ num::NonZeroUsize,
+ ops::{Index, Range},
+};
+
+use crate::let_else::assume;
+use crate::token::AttributeTraceIdx;
+
+/// Provides byte offsets and syntax information about a [`Token`].
+///
+/// [`Token`]: crate::token::Token
+#[allow(missing_docs)]
+#[derive(Eq, PartialEq, Debug)]
+pub enum Trace {
+ Char,
+ StartTag(StartTagTrace),
+ EndTag(EndTagTrace),
+ Comment(CommentTrace),
+ Doctype(DoctypeTrace),
+ EndOfFile,
+}
+
+/// Provides byte offsets and syntax information for a [`StartTag`] token.
+///
+/// [`StartTag`]: crate::token::StartTag
+#[derive(Eq, PartialEq, Debug)]
+pub struct StartTagTrace {
+ /// The span of the tag.
+ pub span: Range<usize>,
+
+ /// The span of the tag name.
+ pub name_span: Range<usize>,
+
+ /// List of [`AttributeTrace`]s for the attributes that were present in the source.
+ pub attribute_traces: AttributeTraceList,
+}
+
+/// Provides byte offsets for an [`EndTag`] token.
+///
+/// [`EndTag`]: crate::token::EndTag
+#[derive(Eq, PartialEq, Debug)]
+pub struct EndTagTrace {
+ /// The span of the tag.
+ pub span: Range<usize>,
+
+ /// The span of the tag name.
+ pub name_span: Range<usize>,
+}
+
+/// Provides byte offsets for a [`Token::Comment`].
+///
+/// [`Token::Comment`]: crate::token::Token::Comment
+#[derive(Eq, PartialEq, Debug)]
+pub struct CommentTrace {
+ /// The offset of the comment data.
+ pub data_span: Range<usize>,
+}
+
+/// Provides byte offsets for a [`Doctype`] token.
+///
+/// [`Doctype`]: crate::token::Doctype
+#[derive(Eq, PartialEq, Debug)]
+pub struct DoctypeTrace {
+ pub(crate) span: Range<usize>,
+ // Using NonZeroUsize to optimize the size of the struct.
+ name_span: Option<Range<std::num::NonZeroUsize>>,
+ public_id_span: Option<Range<std::num::NonZeroUsize>>,
+ system_id_span: Option<Range<std::num::NonZeroUsize>>,
+}
+
+impl DoctypeTrace {
+ /// Returns the span of the DOCTYPE.
+ pub fn span(&self) -> Range<usize> {
+ self.span.clone()
+ }
+
+ /// Returns the span of the name.
+ pub fn name_span(&self) -> Option<Range<usize>> {
+ self.name_span
+ .as_ref()
+ .map(|range| range.start.get()..range.end.get())
+ }
+
+ /// Returns the span of the public identifier.
+ pub fn public_id_span(&self) -> Option<Range<usize>> {
+ self.public_id_span
+ .as_ref()
+ .map(|range| range.start.get()..range.end.get())
+ }
+
+ /// Returns the span of the system identifier.
+ pub fn system_id_span(&self) -> Option<Range<usize>> {
+ self.system_id_span
+ .as_ref()
+ .map(|range| range.start.get()..range.end.get())
+ }
+}
+
+/// Internal [`DoctypeTrace`] methods.
+///
+/// Note that even though it stands to reason that the offsets provided to the `set_`
+/// methods can never be zero, we intentionally don't use `new_unchecked` since
+/// actually verifying that the offsets provided to the respective Emitter methods can
+/// never be zero would non-trivial (since the tokenizer state machine has 80 states).
+impl DoctypeTrace {
+ #[inline]
+ pub(crate) fn new(span_start: usize) -> Self {
+ Self {
+ span: span_start..0,
+ name_span: None,
+ public_id_span: None,
+ system_id_span: None,
+ }
+ }
+
+ #[inline]
+ pub(crate) fn set_name_start(&mut self, start: usize) {
+ let start = NonZeroUsize::new(start).expect("expected offset to be non-zero");
+ self.name_span = Some(start..start);
+ }
+
+ #[inline]
+ pub(crate) fn set_public_id_start(&mut self, start: usize) {
+ let start = NonZeroUsize::new(start).expect("expected offset to be non-zero");
+ self.public_id_span = Some(start..start);
+ }
+
+ #[inline]
+ pub(crate) fn set_system_id_start(&mut self, start: usize) {
+ let start = NonZeroUsize::new(start).expect("expected offset to be non-zero");
+ self.system_id_span = Some(start..start);
+ }
+
+ #[inline]
+ pub(crate) fn set_name_end(&mut self, end: usize) {
+ assume!(Some(span), &mut self.name_span);
+ span.end = NonZeroUsize::new(end).expect("expected offset to be non-zero");
+ }
+
+ #[inline]
+ pub(crate) fn set_public_id_end(&mut self, end: usize) {
+ assume!(Some(span), &mut self.public_id_span);
+ span.end = NonZeroUsize::new(end).expect("expected offset to be non-zero");
+ }
+
+ #[inline]
+ pub(crate) fn set_system_id_end(&mut self, end: usize) {
+ assume!(Some(span), &mut self.system_id_span);
+ span.end = NonZeroUsize::new(end).expect("expected offset to be non-zero");
+ }
+}
+
+/// The syntax of the attribute value.
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum AttrValueSyntax {
+ /// An unquoted attribute value, e.g. `id=foo`.
+ Unquoted,
+ /// A single-quoted attribute value, e.g. `id='foo'`.
+ SingleQuoted,
+ /// A double-quoted attribute value, e.g. `id="foo"`.
+ DoubleQuoted,
+}
+
+/// Provides byte offsets and the [`AttrValueSyntax`] for an attribute that was present in the source.
+#[derive(Eq, PartialEq, Debug)]
+pub struct AttributeTrace {
+ pub(crate) value_syntax: Option<AttrValueSyntax>,
+ pub(crate) name_span: Range<usize>,
+ /// We intentionally don't use `Option<Range<O>>` here to spare us a byte (and padding) per attribute.
+ /// For the empty attribute syntax this is just `O::default()..O::default()`.
+ pub(crate) value_span: Range<usize>,
+}
+
+impl AttributeTrace {
+ /// [`AttributeTrace`] intentionally doesn't implement Default
+ /// (since it's part of the public API and it wouldn't make sense semantically).
+ pub(crate) fn new() -> Self {
+ Self {
+ value_syntax: None,
+ name_span: Default::default(),
+ value_span: Default::default(),
+ }
+ }
+
+ /// Returns the span of the attribute name.
+ pub fn name_span(&self) -> Range<usize> {
+ self.name_span.clone()
+ }
+
+ /// For explicitly defined values returns the span of the attribute value.
+ ///
+ /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
+ pub fn value_span(&self) -> Option<Range<usize>> {
+ if self.value_syntax.is_none() {
+ return None;
+ }
+ Some(self.value_span.clone())
+ }
+
+ /// Returns the attribute value syntax in case the value is explicitly defined.
+ ///
+ /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`).
+ pub fn value_syntax(&self) -> Option<AttrValueSyntax> {
+ self.value_syntax
+ }
+}
+
+/// List of [`AttributeTrace`]s for the attributes that were present in the source.
+#[derive(Eq, PartialEq, Debug)]
+pub struct AttributeTraceList {
+ /// We don't use `HashMap<String, AttributeTrace>` since this would require
+ /// the attribute names to be cloned (which would be less efficient).
+ traces: Vec<AttributeTrace>,
+}
+
+impl Index<AttributeTraceIdx> for AttributeTraceList {
+ type Output = AttributeTrace;
+
+ fn index(&self, index: AttributeTraceIdx) -> &Self::Output {
+ &self.traces[index.0.get() - 1]
+ }
+}
+
+impl AttributeTraceList {
+ pub(crate) fn new() -> Self {
+ Self {
+ traces: Default::default(),
+ }
+ }
+
+ pub(crate) fn insert(&mut self, trace: AttributeTrace) -> AttributeTraceIdx {
+ self.traces.push(trace);
+ let len = self.traces.len();
+ AttributeTraceIdx(
+ // SAFETY: len cannot be zero because we push before calling Vec::len.
+ unsafe { std::num::NonZeroUsize::new_unchecked(len) },
+ )
+ }
+}