//! Provides the [`Trace`] type (byte offsets and syntax information about tokens). use std::{ num::NonZeroUsize, ops::{Index, Range}, }; use crate::let_else::assume; use crate::token::AttributeTraceIdx; /// Provides byte offsets and syntax information about a [`Token`]. /// /// [`Token`]: crate::token::Token #[allow(missing_docs)] #[derive(Eq, PartialEq, Debug)] pub enum Trace { Char(Range), StartTag(StartTagTrace), EndTag(EndTagTrace), Comment(CommentTrace), Doctype(DoctypeTrace), EndOfFile(usize), } /// Provides byte offsets and syntax information for a [`StartTag`] token. /// /// [`StartTag`]: crate::token::StartTag #[derive(Eq, PartialEq, Debug)] pub struct StartTagTrace { /// The span of the tag. pub span: Range, /// The span of the tag name. pub name_span: Range, /// List of [`AttributeTrace`]s for the attributes that were present in the source. pub attribute_traces: AttributeTraceList, } /// Provides byte offsets for an [`EndTag`] token. /// /// [`EndTag`]: crate::token::EndTag #[derive(Eq, PartialEq, Debug)] pub struct EndTagTrace { /// The span of the tag. pub span: Range, /// The span of the tag name. pub name_span: Range, } /// Provides byte offsets for a [`Token::Comment`]. /// /// [`Token::Comment`]: crate::token::Token::Comment #[derive(Eq, PartialEq, Debug)] pub struct CommentTrace { /// The offset of the comment data. pub data_span: Range, } /// Provides byte offsets for a [`Doctype`] token. /// /// [`Doctype`]: crate::token::Doctype #[derive(Eq, PartialEq, Debug)] pub struct DoctypeTrace { pub(crate) span: Range, // Using NonZeroUsize to optimize the size of the struct. name_span: Option>, public_id_span: Option>, system_id_span: Option>, } impl DoctypeTrace { /// Returns the span of the DOCTYPE. pub fn span(&self) -> Range { self.span.clone() } /// Returns the span of the name. pub fn name_span(&self) -> Option> { self.name_span .as_ref() .map(|range| range.start.get()..range.end.get()) } /// Returns the span of the public identifier. pub fn public_id_span(&self) -> Option> { self.public_id_span .as_ref() .map(|range| range.start.get()..range.end.get()) } /// Returns the span of the system identifier. pub fn system_id_span(&self) -> Option> { self.system_id_span .as_ref() .map(|range| range.start.get()..range.end.get()) } } /// Internal [`DoctypeTrace`] methods. /// /// Note that even though it stands to reason that the offsets provided to the `set_` /// methods can never be zero, we intentionally don't use `new_unchecked` since /// actually verifying that the offsets provided to the respective Emitter methods can /// never be zero would non-trivial (since the tokenizer state machine has 80 states). impl DoctypeTrace { #[inline] pub(crate) fn new(span_start: usize) -> Self { Self { span: span_start..0, name_span: None, public_id_span: None, system_id_span: None, } } #[inline] pub(crate) fn set_name_start(&mut self, start: usize) { let start = NonZeroUsize::new(start).expect("expected offset to be non-zero"); self.name_span = Some(start..start); } #[inline] pub(crate) fn set_public_id_start(&mut self, start: usize) { let start = NonZeroUsize::new(start).expect("expected offset to be non-zero"); self.public_id_span = Some(start..start); } #[inline] pub(crate) fn set_system_id_start(&mut self, start: usize) { let start = NonZeroUsize::new(start).expect("expected offset to be non-zero"); self.system_id_span = Some(start..start); } #[inline] pub(crate) fn set_name_end(&mut self, end: usize) { assume!(Some(span), &mut self.name_span); span.end = NonZeroUsize::new(end).expect("expected offset to be non-zero"); } #[inline] pub(crate) fn set_public_id_end(&mut self, end: usize) { assume!(Some(span), &mut self.public_id_span); span.end = NonZeroUsize::new(end).expect("expected offset to be non-zero"); } #[inline] pub(crate) fn set_system_id_end(&mut self, end: usize) { assume!(Some(span), &mut self.system_id_span); span.end = NonZeroUsize::new(end).expect("expected offset to be non-zero"); } } /// The syntax of the attribute value. #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum AttrValueSyntax { /// An unquoted attribute value, e.g. `id=foo`. Unquoted, /// A single-quoted attribute value, e.g. `id='foo'`. SingleQuoted, /// A double-quoted attribute value, e.g. `id="foo"`. DoubleQuoted, } /// Provides byte offsets and the [`AttrValueSyntax`] for an attribute that was present in the source. #[derive(Eq, PartialEq, Debug)] pub struct AttributeTrace { pub(crate) value_syntax: Option, pub(crate) name_span: Range, /// We intentionally don't use `Option>` here to spare us a byte (and padding) per attribute. /// For the empty attribute syntax this is just `O::default()..O::default()`. pub(crate) value_span: Range, } impl AttributeTrace { /// [`AttributeTrace`] intentionally doesn't implement Default /// (since it's part of the public API and it wouldn't make sense semantically). pub(crate) fn new() -> Self { Self { value_syntax: None, name_span: Default::default(), value_span: Default::default(), } } /// Returns the span of the attribute name. pub fn name_span(&self) -> Range { self.name_span.clone() } /// For explicitly defined values returns the span of the attribute value. /// /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in ``). pub fn value_span(&self) -> Option> { if self.value_syntax.is_none() { return None; } Some(self.value_span.clone()) } /// Returns the attribute value syntax in case the value is explicitly defined. /// /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in ``). pub fn value_syntax(&self) -> Option { self.value_syntax } } /// List of [`AttributeTrace`]s for the attributes that were present in the source. #[derive(Eq, PartialEq, Debug)] pub struct AttributeTraceList { /// We don't use `HashMap` since this would require /// the attribute names to be cloned (which would be less efficient). traces: Vec, } impl Index for AttributeTraceList { type Output = AttributeTrace; fn index(&self, index: AttributeTraceIdx) -> &Self::Output { &self.traces[index.0.get() - 1] } } impl AttributeTraceList { pub(crate) fn new() -> Self { Self { traces: Default::default(), } } pub(crate) fn insert(&mut self, trace: AttributeTrace) -> AttributeTraceIdx { self.traces.push(trace); let len = self.traces.len(); AttributeTraceIdx( // SAFETY: len cannot be zero because we push before calling Vec::len. unsafe { std::num::NonZeroUsize::new_unchecked(len) }, ) } }