From a48ddc21a26f394e077e7bd80ef96b2c281e7730 Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Wed, 16 Aug 2023 07:46:50 +0200 Subject: feat!: add all-inclusive spans to tags Also more performant since we no longer have to update the name span on every Emitter::push_tag_name call. --- src/emitter.rs | 57 +++++++++++++++++++++++++++++++---------------------- src/tokenizer.rs | 2 +- tests/test_spans.rs | 34 ++++++++++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 27 deletions(-) diff --git a/src/emitter.rs b/src/emitter.rs index 69e9d45..fcfb251 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -64,7 +64,7 @@ pub trait Emitter { /// error should be emitted. /// /// If the current token is not a start/end tag, this method may panic. - fn emit_current_tag(&mut self); + fn emit_current_tag(&mut self, offset: O); /// Emit the _current token_, assuming it is a comment. /// @@ -268,7 +268,7 @@ impl Emitter for DefaultEmitter { fn init_start_tag(&mut self, offset: O) { self.current_token = Some(Token::StartTag(StartTag { - name_span: offset..offset, + span: offset - b"<".len()..offset - b"<".len(), self_closing: false, name: String::new(), attributes: Default::default(), @@ -276,7 +276,7 @@ impl Emitter for DefaultEmitter { } fn init_end_tag(&mut self, offset: O) { self.current_token = Some(Token::EndTag(EndTag { - name_span: offset..offset, + span: offset - b" Emitter for DefaultEmitter { data_offset, })); } - fn emit_current_tag(&mut self) { + fn emit_current_tag(&mut self, offset: O) { self.flush_current_attribute(); - let token = self.current_token.take().unwrap(); - match token { - Token::EndTag(_) => { + let mut token = self.current_token.take().unwrap(); + match &mut token { + Token::EndTag(tag) => { if !self.seen_attributes.is_empty() { let span = self.attr_in_end_tag_span.take().unwrap(); self.push_error(Error::EndTagWithAttributes, span); } self.seen_attributes.clear(); + tag.span.end = offset + b">".len(); + } + Token::StartTag(tag) => { + tag.span.end = offset + b">".len(); } - Token::StartTag(_) => {} _ => debug_assert!(false), } self.emit_token(token); @@ -341,21 +344,11 @@ impl Emitter for DefaultEmitter { } fn push_tag_name(&mut self, s: &str) { match self.current_token { - Some(Token::StartTag(StartTag { - ref mut name, - ref mut name_span, - .. - })) => { + Some(Token::StartTag(StartTag { ref mut name, .. })) => { name.push_str(s); - name_span.end += s.len(); } - Some(Token::EndTag(EndTag { - ref mut name, - ref mut name_span, - .. - })) => { + Some(Token::EndTag(EndTag { ref mut name, .. })) => { name.push_str(s); - name_span.end += s.len(); } _ => debug_assert!(false), } @@ -470,8 +463,16 @@ pub struct StartTag { /// [`Emitter`] to tweak this behavior. pub attributes: BTreeMap>, - /// The source code span of the tag name. - pub name_span: Range, + /// The source code span of the tag. + pub span: Range, +} + +impl StartTag { + /// Calculates the span for the tag name and returns it. + pub fn name_span(&self) -> Range { + let start = self.span.start + b"<".len(); + start..start + self.name.len() + } } /// A HTML attribute value (plus spans). @@ -493,8 +494,16 @@ pub struct EndTag { /// The ending tag's name, such as `"p"` or `"a"`. pub name: String, - /// The source code span of the tag name. - pub name_span: Range, + /// The source code span of the tag. + pub span: Range, +} + +impl EndTag { + /// Calculates the span for the tag name and returns it. + pub fn name_span(&self) -> Range { + let start = self.span.start + b", O: Offset, E: Emitter> Tokenizer { #[inline] pub(crate) fn emit_current_tag(&mut self) { - self.emitter.emit_current_tag(); + self.emitter.emit_current_tag(self.reader.position() - 1); if self.is_start_tag { std::mem::swap(&mut self.last_start_tag_name, &mut self.current_tag_name); } diff --git a/tests/test_spans.rs b/tests/test_spans.rs index a33c2b3..33f5d11 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -43,13 +43,43 @@ fn annotate(html: &str, labels: Vec<(Range, impl AsRef)>) -> String .join("\n") } +#[test] +fn start_tag_span() { + let html = " "; + let mut labels = Vec::new(); + for token in tokenizer(html) { + if let Token::StartTag(tag) = token { + labels.push((tag.span, "")); + } + } + assert_snapshot!(annotate(html, labels), @r###" + + ^^^ ^^^^^ ^^^^^^^ ^^^^^^ + "###); +} + +#[test] +fn end_tag_span() { + let html = " "; + let mut labels = Vec::new(); + for token in tokenizer(html) { + if let Token::EndTag(tag) = token { + labels.push((tag.span, "")); + } + } + assert_snapshot!(annotate(html, labels), @r###" + + ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^ + "###); +} + #[test] fn start_tag_name_span() { let html = " "; let mut labels = Vec::new(); for token in tokenizer(html) { if let Token::StartTag(tag) = token { - labels.push((tag.name_span, "")); + labels.push((tag.name_span(), "")); } } assert_snapshot!(annotate(html, labels), @r###" @@ -64,7 +94,7 @@ fn end_tag_name_span() { let mut labels = Vec::new(); for token in tokenizer(html) { if let Token::EndTag(tag) = token { - labels.push((tag.name_span, "")); + labels.push((tag.name_span(), "")); } } assert_snapshot!(annotate(html, labels), @r###" -- cgit v1.2.3