diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-01 12:53:29 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 |
commit | f239037c1b960ba16c6c8b2184ac017c53c631bf (patch) | |
tree | 1b40c7151f5f9270b26ba2a15088f90dca175a43 /src | |
parent | f588704c90f33fe27945d742762d016dea3e113c (diff) |
fix!: make start/end tag name spans encoding-independent
Diffstat (limited to 'src')
-rw-r--r-- | src/emitter.rs | 53 | ||||
-rw-r--r-- | src/machine.rs | 3 | ||||
-rw-r--r-- | src/tokenizer.rs | 6 |
3 files changed, 40 insertions, 22 deletions
diff --git a/src/emitter.rs b/src/emitter.rs index ff6e863..aa84215 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -49,10 +49,10 @@ pub trait Emitter<O> { fn emit_string(&mut self, c: &str); /// Set the _current token_ to a start tag. - fn init_start_tag(&mut self, offset: O); + fn init_start_tag(&mut self, tag_offset: O, name_offset: O); /// Set the _current token_ to an end tag. - fn init_end_tag(&mut self, offset: O); + fn init_end_tag(&mut self, tag_offset: O, name_offset: O); /// Set the _current token_ to a comment. fn init_comment(&mut self, data_offset: O); @@ -78,6 +78,11 @@ pub trait Emitter<O> { /// If the current token is not a doctype, this method may panic. fn emit_current_doctype(&mut self, offset: O); + /// Called after the last [`push_tag_name`] call for a tag name. + /// + /// [`push_tag_name`]: Self::push_tag_name + fn terminate_tag_name(&mut self, offset: O) {} + /// Called after the last [`push_attribute_value`] call for an attribute value. /// /// [`push_attribute_value`]: Self::push_attribute_value @@ -273,18 +278,20 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { self.current_characters.push_str(s); } - fn init_start_tag(&mut self, offset: O) { + fn init_start_tag(&mut self, tag_offset: O, name_offset: O) { self.current_token = Some(Token::StartTag(StartTag { - span: offset..O::default(), + span: tag_offset..O::default(), self_closing: false, name: String::new(), attributes: Default::default(), + name_span: name_offset..O::default(), })); } - fn init_end_tag(&mut self, offset: O) { + fn init_end_tag(&mut self, tag_offset: O, name_offset: O) { self.current_token = Some(Token::EndTag(EndTag { - span: offset..O::default(), + span: tag_offset..O::default(), name: String::new(), + name_span: name_offset..O::default(), })); self.seen_attributes.clear(); } @@ -367,6 +374,22 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { } } + fn terminate_tag_name(&mut self, offset: O) { + match self.current_token { + Some(Token::StartTag(StartTag { + ref mut name_span, .. + })) => { + name_span.end = offset; + } + Some(Token::EndTag(EndTag { + ref mut name_span, .. + })) => { + name_span.end = offset; + } + _ => debug_assert!(false), + } + } + fn push_comment(&mut self, s: &str) { match self.current_token { Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s), @@ -483,14 +506,9 @@ pub struct StartTag<O> { /// The source code span of the tag. pub span: Range<O>, -} -impl<O: Offset> StartTag<O> { - /// Calculates the span for the tag name and returns it. - pub fn name_span(&self) -> Range<O> { - let start = self.span.start + b"<".len(); - start..start + self.name.len() - } + /// The span of the tag name. + pub name_span: Range<O>, } /// An HTML end/close tag, such as `</p>` or `</a>`. @@ -502,14 +520,9 @@ pub struct EndTag<O> { /// The source code span of the tag. pub span: Range<O>, -} -impl<O: Offset> EndTag<O> { - /// Calculates the span for the tag name and returns it. - pub fn name_span(&self) -> Range<O> { - let start = self.span.start + b"</".len(); - start..start + self.name.len() - } + /// The span of the tag name. + pub name_span: Range<O>, } /// An HTML comment. diff --git a/src/machine.rs b/src/machine.rs index 5b36eee..c27708d 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -190,14 +190,17 @@ where }, State::TagName => match slf.read_char()? { Some(whitespace_pat!()) => { + slf.emitter.terminate_tag_name(slf.position_before_match); slf.state = State::BeforeAttributeName; Ok(ControlToken::Continue) } Some('/') => { + slf.emitter.terminate_tag_name(slf.position_before_match); slf.state = State::SelfClosingStartTag; Ok(ControlToken::Continue) } Some('>') => { + slf.emitter.terminate_tag_name(slf.position_before_match); slf.state = State::Data; slf.emit_current_tag(); Ok(ControlToken::Continue) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 58f7b80..e0402b9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -221,14 +221,16 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> { #[inline] pub(crate) fn init_start_tag(&mut self) { - self.emitter.init_start_tag(self.some_offset); + self.emitter + .init_start_tag(self.some_offset, self.position_before_match); self.current_tag_name.clear(); self.is_start_tag = true; } #[inline] pub(crate) fn init_end_tag(&mut self) { - self.emitter.init_end_tag(self.some_offset); + self.emitter + .init_end_tag(self.some_offset, self.position_before_match); self.current_tag_name.clear(); self.is_start_tag = false; } |