diff options
Diffstat (limited to 'src/trace.rs')
-rw-r--r-- | src/trace.rs | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/src/trace.rs b/src/trace.rs new file mode 100644 index 0000000..a816429 --- /dev/null +++ b/src/trace.rs @@ -0,0 +1,241 @@ +//! Provides the [`Trace`] type (byte offsets and syntax information about tokens). + +use std::{ + num::NonZeroUsize, + ops::{Index, Range}, +}; + +use crate::let_else::assume; +use crate::token::AttributeTraceIdx; + +/// Provides byte offsets and syntax information about a [`Token`]. +/// +/// [`Token`]: crate::token::Token +#[allow(missing_docs)] +#[derive(Eq, PartialEq, Debug)] +pub enum Trace { + Char, + StartTag(StartTagTrace), + EndTag(EndTagTrace), + Comment(CommentTrace), + Doctype(DoctypeTrace), + EndOfFile, +} + +/// Provides byte offsets and syntax information for a [`StartTag`] token. +/// +/// [`StartTag`]: crate::token::StartTag +#[derive(Eq, PartialEq, Debug)] +pub struct StartTagTrace { + /// The span of the tag. + pub span: Range<usize>, + + /// The span of the tag name. + pub name_span: Range<usize>, + + /// List of [`AttributeTrace`]s for the attributes that were present in the source. + pub attribute_traces: AttributeTraceList, +} + +/// Provides byte offsets for an [`EndTag`] token. +/// +/// [`EndTag`]: crate::token::EndTag +#[derive(Eq, PartialEq, Debug)] +pub struct EndTagTrace { + /// The span of the tag. + pub span: Range<usize>, + + /// The span of the tag name. + pub name_span: Range<usize>, +} + +/// Provides byte offsets for a [`Token::Comment`]. +/// +/// [`Token::Comment`]: crate::token::Token::Comment +#[derive(Eq, PartialEq, Debug)] +pub struct CommentTrace { + /// The offset of the comment data. + pub data_span: Range<usize>, +} + +/// Provides byte offsets for a [`Doctype`] token. +/// +/// [`Doctype`]: crate::token::Doctype +#[derive(Eq, PartialEq, Debug)] +pub struct DoctypeTrace { + pub(crate) span: Range<usize>, + // Using NonZeroUsize to optimize the size of the struct. + name_span: Option<Range<std::num::NonZeroUsize>>, + public_id_span: Option<Range<std::num::NonZeroUsize>>, + system_id_span: Option<Range<std::num::NonZeroUsize>>, +} + +impl DoctypeTrace { + /// Returns the span of the DOCTYPE. + pub fn span(&self) -> Range<usize> { + self.span.clone() + } + + /// Returns the span of the name. + pub fn name_span(&self) -> Option<Range<usize>> { + self.name_span + .as_ref() + .map(|range| range.start.get()..range.end.get()) + } + + /// Returns the span of the public identifier. + pub fn public_id_span(&self) -> Option<Range<usize>> { + self.public_id_span + .as_ref() + .map(|range| range.start.get()..range.end.get()) + } + + /// Returns the span of the system identifier. + pub fn system_id_span(&self) -> Option<Range<usize>> { + self.system_id_span + .as_ref() + .map(|range| range.start.get()..range.end.get()) + } +} + +/// Internal [`DoctypeTrace`] methods. +/// +/// Note that even though it stands to reason that the offsets provided to the `set_` +/// methods can never be zero, we intentionally don't use `new_unchecked` since +/// actually verifying that the offsets provided to the respective Emitter methods can +/// never be zero would non-trivial (since the tokenizer state machine has 80 states). +impl DoctypeTrace { + #[inline] + pub(crate) fn new(span_start: usize) -> Self { + Self { + span: span_start..0, + name_span: None, + public_id_span: None, + system_id_span: None, + } + } + + #[inline] + pub(crate) fn set_name_start(&mut self, start: usize) { + let start = NonZeroUsize::new(start).expect("expected offset to be non-zero"); + self.name_span = Some(start..start); + } + + #[inline] + pub(crate) fn set_public_id_start(&mut self, start: usize) { + let start = NonZeroUsize::new(start).expect("expected offset to be non-zero"); + self.public_id_span = Some(start..start); + } + + #[inline] + pub(crate) fn set_system_id_start(&mut self, start: usize) { + let start = NonZeroUsize::new(start).expect("expected offset to be non-zero"); + self.system_id_span = Some(start..start); + } + + #[inline] + pub(crate) fn set_name_end(&mut self, end: usize) { + assume!(Some(span), &mut self.name_span); + span.end = NonZeroUsize::new(end).expect("expected offset to be non-zero"); + } + + #[inline] + pub(crate) fn set_public_id_end(&mut self, end: usize) { + assume!(Some(span), &mut self.public_id_span); + span.end = NonZeroUsize::new(end).expect("expected offset to be non-zero"); + } + + #[inline] + pub(crate) fn set_system_id_end(&mut self, end: usize) { + assume!(Some(span), &mut self.system_id_span); + span.end = NonZeroUsize::new(end).expect("expected offset to be non-zero"); + } +} + +/// The syntax of the attribute value. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum AttrValueSyntax { + /// An unquoted attribute value, e.g. `id=foo`. + Unquoted, + /// A single-quoted attribute value, e.g. `id='foo'`. + SingleQuoted, + /// A double-quoted attribute value, e.g. `id="foo"`. + DoubleQuoted, +} + +/// Provides byte offsets and the [`AttrValueSyntax`] for an attribute that was present in the source. +#[derive(Eq, PartialEq, Debug)] +pub struct AttributeTrace { + pub(crate) value_syntax: Option<AttrValueSyntax>, + pub(crate) name_span: Range<usize>, + /// We intentionally don't use `Option<Range<O>>` here to spare us a byte (and padding) per attribute. + /// For the empty attribute syntax this is just `O::default()..O::default()`. + pub(crate) value_span: Range<usize>, +} + +impl AttributeTrace { + /// [`AttributeTrace`] intentionally doesn't implement Default + /// (since it's part of the public API and it wouldn't make sense semantically). + pub(crate) fn new() -> Self { + Self { + value_syntax: None, + name_span: Default::default(), + value_span: Default::default(), + } + } + + /// Returns the span of the attribute name. + pub fn name_span(&self) -> Range<usize> { + self.name_span.clone() + } + + /// For explicitly defined values returns the span of the attribute value. + /// + /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`). + pub fn value_span(&self) -> Option<Range<usize>> { + if self.value_syntax.is_none() { + return None; + } + Some(self.value_span.clone()) + } + + /// Returns the attribute value syntax in case the value is explicitly defined. + /// + /// Returns `None` for attributes using the empty attribute syntax (e.g. `disabled` in `<input disabled>`). + pub fn value_syntax(&self) -> Option<AttrValueSyntax> { + self.value_syntax + } +} + +/// List of [`AttributeTrace`]s for the attributes that were present in the source. +#[derive(Eq, PartialEq, Debug)] +pub struct AttributeTraceList { + /// We don't use `HashMap<String, AttributeTrace>` since this would require + /// the attribute names to be cloned (which would be less efficient). + traces: Vec<AttributeTrace>, +} + +impl Index<AttributeTraceIdx> for AttributeTraceList { + type Output = AttributeTrace; + + fn index(&self, index: AttributeTraceIdx) -> &Self::Output { + &self.traces[index.0.get() - 1] + } +} + +impl AttributeTraceList { + pub(crate) fn new() -> Self { + Self { + traces: Default::default(), + } + } + + pub(crate) fn insert(&mut self, trace: AttributeTrace) -> AttributeTraceIdx { + self.traces.push(trace); + let len = self.traces.len(); + AttributeTraceIdx( + // SAFETY: len cannot be zero because we push before calling Vec::len. + unsafe { std::num::NonZeroUsize::new_unchecked(len) }, + ) + } +} |