diff options
| author | Martin Fischer <martin@push-f.com> | 2023-09-01 12:53:29 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 | 
| commit | f239037c1b960ba16c6c8b2184ac017c53c631bf (patch) | |
| tree | 1b40c7151f5f9270b26ba2a15088f90dca175a43 /src | |
| parent | f588704c90f33fe27945d742762d016dea3e113c (diff) | |
fix!: make start/end tag name spans encoding-independent
Diffstat (limited to 'src')
| -rw-r--r-- | src/emitter.rs | 53 | ||||
| -rw-r--r-- | src/machine.rs | 3 | ||||
| -rw-r--r-- | src/tokenizer.rs | 6 | 
3 files changed, 40 insertions, 22 deletions
| diff --git a/src/emitter.rs b/src/emitter.rs index ff6e863..aa84215 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -49,10 +49,10 @@ pub trait Emitter<O> {      fn emit_string(&mut self, c: &str);      /// Set the _current token_ to a start tag. -    fn init_start_tag(&mut self, offset: O); +    fn init_start_tag(&mut self, tag_offset: O, name_offset: O);      /// Set the _current token_ to an end tag. -    fn init_end_tag(&mut self, offset: O); +    fn init_end_tag(&mut self, tag_offset: O, name_offset: O);      /// Set the _current token_ to a comment.      fn init_comment(&mut self, data_offset: O); @@ -78,6 +78,11 @@ pub trait Emitter<O> {      /// If the current token is not a doctype, this method may panic.      fn emit_current_doctype(&mut self, offset: O); +    /// Called after the last [`push_tag_name`] call for a tag name. +    /// +    /// [`push_tag_name`]: Self::push_tag_name +    fn terminate_tag_name(&mut self, offset: O) {} +      /// Called after the last [`push_attribute_value`] call for an attribute value.      ///      /// [`push_attribute_value`]: Self::push_attribute_value @@ -273,18 +278,20 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {          self.current_characters.push_str(s);      } -    fn init_start_tag(&mut self, offset: O) { +    fn init_start_tag(&mut self, tag_offset: O, name_offset: O) {          self.current_token = Some(Token::StartTag(StartTag { -            span: offset..O::default(), +            span: tag_offset..O::default(),              self_closing: false,              name: String::new(),              attributes: Default::default(), +            name_span: name_offset..O::default(),          }));      } -    fn init_end_tag(&mut self, offset: O) { +    fn init_end_tag(&mut self, tag_offset: O, name_offset: O) {          self.current_token = Some(Token::EndTag(EndTag { -            span: offset..O::default(), +            span: tag_offset..O::default(),              name: String::new(), +            name_span: name_offset..O::default(),          }));          self.seen_attributes.clear();      } @@ -367,6 +374,22 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {          }      } +    fn terminate_tag_name(&mut self, offset: O) { +        match self.current_token { +            Some(Token::StartTag(StartTag { +                ref mut name_span, .. +            })) => { +                name_span.end = offset; +            } +            Some(Token::EndTag(EndTag { +                ref mut name_span, .. +            })) => { +                name_span.end = offset; +            } +            _ => debug_assert!(false), +        } +    } +      fn push_comment(&mut self, s: &str) {          match self.current_token {              Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s), @@ -483,14 +506,9 @@ pub struct StartTag<O> {      /// The source code span of the tag.      pub span: Range<O>, -} -impl<O: Offset> StartTag<O> { -    /// Calculates the span for the tag name and returns it. -    pub fn name_span(&self) -> Range<O> { -        let start = self.span.start + b"<".len(); -        start..start + self.name.len() -    } +    /// The span of the tag name. +    pub name_span: Range<O>,  }  /// An HTML end/close tag, such as `</p>` or `</a>`. @@ -502,14 +520,9 @@ pub struct EndTag<O> {      /// The source code span of the tag.      pub span: Range<O>, -} -impl<O: Offset> EndTag<O> { -    /// Calculates the span for the tag name and returns it. -    pub fn name_span(&self) -> Range<O> { -        let start = self.span.start + b"</".len(); -        start..start + self.name.len() -    } +    /// The span of the tag name. +    pub name_span: Range<O>,  }  /// An HTML comment. diff --git a/src/machine.rs b/src/machine.rs index 5b36eee..c27708d 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -190,14 +190,17 @@ where          },          State::TagName => match slf.read_char()? {              Some(whitespace_pat!()) => { +                slf.emitter.terminate_tag_name(slf.position_before_match);                  slf.state = State::BeforeAttributeName;                  Ok(ControlToken::Continue)              }              Some('/') => { +                slf.emitter.terminate_tag_name(slf.position_before_match);                  slf.state = State::SelfClosingStartTag;                  Ok(ControlToken::Continue)              }              Some('>') => { +                slf.emitter.terminate_tag_name(slf.position_before_match);                  slf.state = State::Data;                  slf.emit_current_tag();                  Ok(ControlToken::Continue) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 58f7b80..e0402b9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -221,14 +221,16 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {      #[inline]      pub(crate) fn init_start_tag(&mut self) { -        self.emitter.init_start_tag(self.some_offset); +        self.emitter +            .init_start_tag(self.some_offset, self.position_before_match);          self.current_tag_name.clear();          self.is_start_tag = true;      }      #[inline]      pub(crate) fn init_end_tag(&mut self) { -        self.emitter.init_end_tag(self.some_offset); +        self.emitter +            .init_end_tag(self.some_offset, self.position_before_match);          self.current_tag_name.clear();          self.is_start_tag = false;      } | 
