summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-01 12:53:29 +0200
committerMartin Fischer <martin@push-f.com>2023-09-03 23:00:05 +0200
commitf239037c1b960ba16c6c8b2184ac017c53c631bf (patch)
tree1b40c7151f5f9270b26ba2a15088f90dca175a43 /src
parentf588704c90f33fe27945d742762d016dea3e113c (diff)
fix!: make start/end tag name spans encoding-independent
Diffstat (limited to 'src')
-rw-r--r--src/emitter.rs53
-rw-r--r--src/machine.rs3
-rw-r--r--src/tokenizer.rs6
3 files changed, 40 insertions, 22 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index ff6e863..aa84215 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -49,10 +49,10 @@ pub trait Emitter<O> {
fn emit_string(&mut self, c: &str);
/// Set the _current token_ to a start tag.
- fn init_start_tag(&mut self, offset: O);
+ fn init_start_tag(&mut self, tag_offset: O, name_offset: O);
/// Set the _current token_ to an end tag.
- fn init_end_tag(&mut self, offset: O);
+ fn init_end_tag(&mut self, tag_offset: O, name_offset: O);
/// Set the _current token_ to a comment.
fn init_comment(&mut self, data_offset: O);
@@ -78,6 +78,11 @@ pub trait Emitter<O> {
/// If the current token is not a doctype, this method may panic.
fn emit_current_doctype(&mut self, offset: O);
+ /// Called after the last [`push_tag_name`] call for a tag name.
+ ///
+ /// [`push_tag_name`]: Self::push_tag_name
+ fn terminate_tag_name(&mut self, offset: O) {}
+
/// Called after the last [`push_attribute_value`] call for an attribute value.
///
/// [`push_attribute_value`]: Self::push_attribute_value
@@ -273,18 +278,20 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
self.current_characters.push_str(s);
}
- fn init_start_tag(&mut self, offset: O) {
+ fn init_start_tag(&mut self, tag_offset: O, name_offset: O) {
self.current_token = Some(Token::StartTag(StartTag {
- span: offset..O::default(),
+ span: tag_offset..O::default(),
self_closing: false,
name: String::new(),
attributes: Default::default(),
+ name_span: name_offset..O::default(),
}));
}
- fn init_end_tag(&mut self, offset: O) {
+ fn init_end_tag(&mut self, tag_offset: O, name_offset: O) {
self.current_token = Some(Token::EndTag(EndTag {
- span: offset..O::default(),
+ span: tag_offset..O::default(),
name: String::new(),
+ name_span: name_offset..O::default(),
}));
self.seen_attributes.clear();
}
@@ -367,6 +374,22 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
}
}
+ fn terminate_tag_name(&mut self, offset: O) {
+ match self.current_token {
+ Some(Token::StartTag(StartTag {
+ ref mut name_span, ..
+ })) => {
+ name_span.end = offset;
+ }
+ Some(Token::EndTag(EndTag {
+ ref mut name_span, ..
+ })) => {
+ name_span.end = offset;
+ }
+ _ => debug_assert!(false),
+ }
+ }
+
fn push_comment(&mut self, s: &str) {
match self.current_token {
Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s),
@@ -483,14 +506,9 @@ pub struct StartTag<O> {
/// The source code span of the tag.
pub span: Range<O>,
-}
-impl<O: Offset> StartTag<O> {
- /// Calculates the span for the tag name and returns it.
- pub fn name_span(&self) -> Range<O> {
- let start = self.span.start + b"<".len();
- start..start + self.name.len()
- }
+ /// The span of the tag name.
+ pub name_span: Range<O>,
}
/// An HTML end/close tag, such as `</p>` or `</a>`.
@@ -502,14 +520,9 @@ pub struct EndTag<O> {
/// The source code span of the tag.
pub span: Range<O>,
-}
-impl<O: Offset> EndTag<O> {
- /// Calculates the span for the tag name and returns it.
- pub fn name_span(&self) -> Range<O> {
- let start = self.span.start + b"</".len();
- start..start + self.name.len()
- }
+ /// The span of the tag name.
+ pub name_span: Range<O>,
}
/// An HTML comment.
diff --git a/src/machine.rs b/src/machine.rs
index 5b36eee..c27708d 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -190,14 +190,17 @@ where
},
State::TagName => match slf.read_char()? {
Some(whitespace_pat!()) => {
+ slf.emitter.terminate_tag_name(slf.position_before_match);
slf.state = State::BeforeAttributeName;
Ok(ControlToken::Continue)
}
Some('/') => {
+ slf.emitter.terminate_tag_name(slf.position_before_match);
slf.state = State::SelfClosingStartTag;
Ok(ControlToken::Continue)
}
Some('>') => {
+ slf.emitter.terminate_tag_name(slf.position_before_match);
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 58f7b80..e0402b9 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -221,14 +221,16 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
#[inline]
pub(crate) fn init_start_tag(&mut self) {
- self.emitter.init_start_tag(self.some_offset);
+ self.emitter
+ .init_start_tag(self.some_offset, self.position_before_match);
self.current_tag_name.clear();
self.is_start_tag = true;
}
#[inline]
pub(crate) fn init_end_tag(&mut self) {
- self.emitter.init_end_tag(self.some_offset);
+ self.emitter
+ .init_end_tag(self.some_offset, self.position_before_match);
self.current_tag_name.clear();
self.is_start_tag = false;
}