summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-16 07:46:50 +0200
committerMartin Fischer <martin@push-f.com>2023-08-19 13:41:55 +0200
commita48ddc21a26f394e077e7bd80ef96b2c281e7730 (patch)
treeb49d5c01b0fffeab2f66c00ec33bd11a5962ee25
parent0d96df198b1223e942abfefb7ea51ccab99638cc (diff)
feat!: add all-inclusive spans to tags
Also more performant since we no longer have to update the name span on every Emitter::push_tag_name call.
-rw-r--r--src/emitter.rs57
-rw-r--r--src/tokenizer.rs2
-rw-r--r--tests/test_spans.rs34
3 files changed, 66 insertions, 27 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index 69e9d45..fcfb251 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -64,7 +64,7 @@ pub trait Emitter<O> {
/// error should be emitted.
///
/// If the current token is not a start/end tag, this method may panic.
- fn emit_current_tag(&mut self);
+ fn emit_current_tag(&mut self, offset: O);
/// Emit the _current token_, assuming it is a comment.
///
@@ -268,7 +268,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
fn init_start_tag(&mut self, offset: O) {
self.current_token = Some(Token::StartTag(StartTag {
- name_span: offset..offset,
+ span: offset - b"<".len()..offset - b"<".len(),
self_closing: false,
name: String::new(),
attributes: Default::default(),
@@ -276,7 +276,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
}
fn init_end_tag(&mut self, offset: O) {
self.current_token = Some(Token::EndTag(EndTag {
- name_span: offset..offset,
+ span: offset - b"</".len()..offset - b"</".len(),
name: String::new(),
}));
self.seen_attributes.clear();
@@ -288,18 +288,21 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
data_offset,
}));
}
- fn emit_current_tag(&mut self) {
+ fn emit_current_tag(&mut self, offset: O) {
self.flush_current_attribute();
- let token = self.current_token.take().unwrap();
- match token {
- Token::EndTag(_) => {
+ let mut token = self.current_token.take().unwrap();
+ match &mut token {
+ Token::EndTag(tag) => {
if !self.seen_attributes.is_empty() {
let span = self.attr_in_end_tag_span.take().unwrap();
self.push_error(Error::EndTagWithAttributes, span);
}
self.seen_attributes.clear();
+ tag.span.end = offset + b">".len();
+ }
+ Token::StartTag(tag) => {
+ tag.span.end = offset + b">".len();
}
- Token::StartTag(_) => {}
_ => debug_assert!(false),
}
self.emit_token(token);
@@ -341,21 +344,11 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
}
fn push_tag_name(&mut self, s: &str) {
match self.current_token {
- Some(Token::StartTag(StartTag {
- ref mut name,
- ref mut name_span,
- ..
- })) => {
+ Some(Token::StartTag(StartTag { ref mut name, .. })) => {
name.push_str(s);
- name_span.end += s.len();
}
- Some(Token::EndTag(EndTag {
- ref mut name,
- ref mut name_span,
- ..
- })) => {
+ Some(Token::EndTag(EndTag { ref mut name, .. })) => {
name.push_str(s);
- name_span.end += s.len();
}
_ => debug_assert!(false),
}
@@ -470,8 +463,16 @@ pub struct StartTag<O> {
/// [`Emitter`] to tweak this behavior.
pub attributes: BTreeMap<String, Attribute<O>>,
- /// The source code span of the tag name.
- pub name_span: Range<O>,
+ /// The source code span of the tag.
+ pub span: Range<O>,
+}
+
+impl<O: Offset> StartTag<O> {
+ /// Calculates the span for the tag name and returns it.
+ pub fn name_span(&self) -> Range<O> {
+ let start = self.span.start + b"<".len();
+ start..start + self.name.len()
+ }
}
/// A HTML attribute value (plus spans).
@@ -493,8 +494,16 @@ pub struct EndTag<O> {
/// The ending tag's name, such as `"p"` or `"a"`.
pub name: String,
- /// The source code span of the tag name.
- pub name_span: Range<O>,
+ /// The source code span of the tag.
+ pub span: Range<O>,
+}
+
+impl<O: Offset> EndTag<O> {
+ /// Calculates the span for the tag name and returns it.
+ pub fn name_span(&self) -> Range<O> {
+ let start = self.span.start + b"</".len();
+ start..start + self.name.len()
+ }
}
/// An HTML comment.
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index e8a8908..1b80ec3 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -160,7 +160,7 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
#[inline]
pub(crate) fn emit_current_tag(&mut self) {
- self.emitter.emit_current_tag();
+ self.emitter.emit_current_tag(self.reader.position() - 1);
if self.is_start_tag {
std::mem::swap(&mut self.last_start_tag_name, &mut self.current_tag_name);
}
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index a33c2b3..33f5d11 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -44,12 +44,42 @@ fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String
}
#[test]
+fn start_tag_span() {
+ let html = "<x> <xyz> <xyz > <xyz/>";
+ let mut labels = Vec::new();
+ for token in tokenizer(html) {
+ if let Token::StartTag(tag) = token {
+ labels.push((tag.span, ""));
+ }
+ }
+ assert_snapshot!(annotate(html, labels), @r###"
+ <x> <xyz> <xyz > <xyz/>
+ ^^^ ^^^^^ ^^^^^^^ ^^^^^^
+ "###);
+}
+
+#[test]
+fn end_tag_span() {
+ let html = "</x> </xyz> </xyz > </xyz/>";
+ let mut labels = Vec::new();
+ for token in tokenizer(html) {
+ if let Token::EndTag(tag) = token {
+ labels.push((tag.span, ""));
+ }
+ }
+ assert_snapshot!(annotate(html, labels), @r###"
+ </x> </xyz> </xyz > </xyz/>
+ ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^
+ "###);
+}
+
+#[test]
fn start_tag_name_span() {
let html = "<x> <xyz> <xyz > <xyz/>";
let mut labels = Vec::new();
for token in tokenizer(html) {
if let Token::StartTag(tag) = token {
- labels.push((tag.name_span, ""));
+ labels.push((tag.name_span(), ""));
}
}
assert_snapshot!(annotate(html, labels), @r###"
@@ -64,7 +94,7 @@ fn end_tag_name_span() {
let mut labels = Vec::new();
for token in tokenizer(html) {
if let Token::EndTag(tag) = token {
- labels.push((tag.name_span, ""));
+ labels.push((tag.name_span(), ""));
}
}
assert_snapshot!(annotate(html, labels), @r###"