diff options
| author | Martin Fischer <martin@push-f.com> | 2023-08-16 07:46:50 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2023-08-19 13:41:55 +0200 | 
| commit | a48ddc21a26f394e077e7bd80ef96b2c281e7730 (patch) | |
| tree | b49d5c01b0fffeab2f66c00ec33bd11a5962ee25 | |
| parent | 0d96df198b1223e942abfefb7ea51ccab99638cc (diff) | |
feat!: add all-inclusive spans to tags
Also more performant since we no longer have to update
the name span on every Emitter::push_tag_name call.
| -rw-r--r-- | src/emitter.rs | 57 | ||||
| -rw-r--r-- | src/tokenizer.rs | 2 | ||||
| -rw-r--r-- | tests/test_spans.rs | 34 | 
3 files changed, 66 insertions, 27 deletions
| diff --git a/src/emitter.rs b/src/emitter.rs index 69e9d45..fcfb251 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -64,7 +64,7 @@ pub trait Emitter<O> {      /// error should be emitted.      ///      /// If the current token is not a start/end tag, this method may panic. -    fn emit_current_tag(&mut self); +    fn emit_current_tag(&mut self, offset: O);      /// Emit the _current token_, assuming it is a comment.      /// @@ -268,7 +268,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {      fn init_start_tag(&mut self, offset: O) {          self.current_token = Some(Token::StartTag(StartTag { -            name_span: offset..offset, +            span: offset - b"<".len()..offset - b"<".len(),              self_closing: false,              name: String::new(),              attributes: Default::default(), @@ -276,7 +276,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {      }      fn init_end_tag(&mut self, offset: O) {          self.current_token = Some(Token::EndTag(EndTag { -            name_span: offset..offset, +            span: offset - b"</".len()..offset - b"</".len(),              name: String::new(),          }));          self.seen_attributes.clear(); @@ -288,18 +288,21 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {              data_offset,          }));      } -    fn emit_current_tag(&mut self) { +    fn emit_current_tag(&mut self, offset: O) {          self.flush_current_attribute(); -        let token = self.current_token.take().unwrap(); -        match token { -            Token::EndTag(_) => { +        let mut token = self.current_token.take().unwrap(); +        match &mut token { +            Token::EndTag(tag) => {                  if !self.seen_attributes.is_empty() {                      let span = self.attr_in_end_tag_span.take().unwrap();                      self.push_error(Error::EndTagWithAttributes, span);                  }                  self.seen_attributes.clear(); +                tag.span.end = offset + b">".len(); +            } +            Token::StartTag(tag) => { +                tag.span.end = offset + b">".len();              } -            Token::StartTag(_) => {}              _ => debug_assert!(false),          }          self.emit_token(token); @@ -341,21 +344,11 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {      }      fn push_tag_name(&mut self, s: &str) {          match self.current_token { -            Some(Token::StartTag(StartTag { -                ref mut name, -                ref mut name_span, -                .. -            })) => { +            Some(Token::StartTag(StartTag { ref mut name, .. })) => {                  name.push_str(s); -                name_span.end += s.len();              } -            Some(Token::EndTag(EndTag { -                ref mut name, -                ref mut name_span, -                .. -            })) => { +            Some(Token::EndTag(EndTag { ref mut name, .. })) => {                  name.push_str(s); -                name_span.end += s.len();              }              _ => debug_assert!(false),          } @@ -470,8 +463,16 @@ pub struct StartTag<O> {      /// [`Emitter`] to tweak this behavior.      pub attributes: BTreeMap<String, Attribute<O>>, -    /// The source code span of the tag name. -    pub name_span: Range<O>, +    /// The source code span of the tag. +    pub span: Range<O>, +} + +impl<O: Offset> StartTag<O> { +    /// Calculates the span for the tag name and returns it. +    pub fn name_span(&self) -> Range<O> { +        let start = self.span.start + b"<".len(); +        start..start + self.name.len() +    }  }  /// A HTML attribute value (plus spans). @@ -493,8 +494,16 @@ pub struct EndTag<O> {      /// The ending tag's name, such as `"p"` or `"a"`.      pub name: String, -    /// The source code span of the tag name. -    pub name_span: Range<O>, +    /// The source code span of the tag. +    pub span: Range<O>, +} + +impl<O: Offset> EndTag<O> { +    /// Calculates the span for the tag name and returns it. +    pub fn name_span(&self) -> Range<O> { +        let start = self.span.start + b"</".len(); +        start..start + self.name.len() +    }  }  /// An HTML comment. diff --git a/src/tokenizer.rs b/src/tokenizer.rs index e8a8908..1b80ec3 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -160,7 +160,7 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {      #[inline]      pub(crate) fn emit_current_tag(&mut self) { -        self.emitter.emit_current_tag(); +        self.emitter.emit_current_tag(self.reader.position() - 1);          if self.is_start_tag {              std::mem::swap(&mut self.last_start_tag_name, &mut self.current_tag_name);          } diff --git a/tests/test_spans.rs b/tests/test_spans.rs index a33c2b3..33f5d11 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -44,12 +44,42 @@ fn annotate(html: &str, labels: Vec<(Range<usize>, impl AsRef<str>)>) -> String  }  #[test] +fn start_tag_span() { +    let html = "<x> <xyz> <xyz  > <xyz/>"; +    let mut labels = Vec::new(); +    for token in tokenizer(html) { +        if let Token::StartTag(tag) = token { +            labels.push((tag.span, "")); +        } +    } +    assert_snapshot!(annotate(html, labels), @r###" +    <x> <xyz> <xyz  > <xyz/> +    ^^^ ^^^^^ ^^^^^^^ ^^^^^^ +    "###); +} + +#[test] +fn end_tag_span() { +    let html = "</x> </xyz> </xyz  > </xyz/>"; +    let mut labels = Vec::new(); +    for token in tokenizer(html) { +        if let Token::EndTag(tag) = token { +            labels.push((tag.span, "")); +        } +    } +    assert_snapshot!(annotate(html, labels), @r###" +    </x> </xyz> </xyz  > </xyz/> +    ^^^^ ^^^^^^ ^^^^^^^^ ^^^^^^^ +    "###); +} + +#[test]  fn start_tag_name_span() {      let html = "<x> <xyz> <xyz  > <xyz/>";      let mut labels = Vec::new();      for token in tokenizer(html) {          if let Token::StartTag(tag) = token { -            labels.push((tag.name_span, "")); +            labels.push((tag.name_span(), ""));          }      }      assert_snapshot!(annotate(html, labels), @r###" @@ -64,7 +94,7 @@ fn end_tag_name_span() {      let mut labels = Vec::new();      for token in tokenizer(html) {          if let Token::EndTag(tag) = token { -            labels.push((tag.name_span, "")); +            labels.push((tag.name_span(), ""));          }      }      assert_snapshot!(annotate(html, labels), @r###" | 
