diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-01 13:27:46 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 |
commit | 6556106154d7e7cbc7820f223b9baaf49a900449 (patch) | |
tree | 1778db5ce2665aa49afb68041f53b6ee07149916 | |
parent | 0581e619867bde2374376265b24169b404187529 (diff) |
fix!: make set_self_closing encoding-independent
-rw-r--r-- | CHANGELOG.md | 4 | ||||
-rw-r--r-- | src/emitter.rs | 9 | ||||
-rw-r--r-- | src/machine.rs | 3 | ||||
-rw-r--r-- | tests/test_spans.rs | 19 |
4 files changed, 9 insertions, 26 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 87785c5..b7ae1d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,12 +20,12 @@ * Removed `adjusted_current_node_present_and_not_in_html_namespace`. - * `emit_error` now takes a span instead of an offset. + * `emit_error` and `set_self_closing` now take a span instead of an offset. * Added a `name_offset` parameter to `init_start_tag` and `init_end_tag`. * Several provided offsets have been changed to be more sensible. - Affected are: `set_self_closing`, `init_start_tag`, `init_end_tag`, `emit_current_tag` + Affected are: `init_start_tag`, `init_end_tag`, `emit_current_tag` * token types diff --git a/src/emitter.rs b/src/emitter.rs index bb12ca4..341d335 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -99,7 +99,7 @@ pub trait Emitter<O> { /// /// If the current token is an end tag, the emitter should emit the /// [`Error::EndTagWithTrailingSolidus`] error. - fn set_self_closing(&mut self, slash_offset: O); + fn set_self_closing(&mut self, slash_span: Range<O>); /// Assuming the _current token_ is a doctype, set its "force quirks" flag to true. /// @@ -339,7 +339,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { self.emit_token(Token::Doctype(doctype)); } - fn set_self_closing(&mut self, slash_offset: O) { + fn set_self_closing(&mut self, slash_span: Range<O>) { let tag = self.current_token.as_mut().unwrap(); match tag { Token::StartTag(StartTag { @@ -349,10 +349,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { *self_closing = true; } Token::EndTag(_) => { - self.emit_error( - Error::EndTagWithTrailingSolidus, - slash_offset..slash_offset + 1, - ); + self.emit_error(Error::EndTagWithTrailingSolidus, slash_span); } _ => { debug_assert!(false); diff --git a/src/machine.rs b/src/machine.rs index 4f2d129..5c5c533 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -929,7 +929,8 @@ where State::SelfClosingStartTag => match slf.read_char()? { Some('>') => { slf.emitter.set_self_closing( - slf.position_before_match - slf.reader.len_of_char_in_current_encoding('/'), + slf.position_before_match - slf.reader.len_of_char_in_current_encoding('/') + ..slf.position_before_match, ); slf.state = State::Data; slf.emit_current_tag(); diff --git a/tests/test_spans.rs b/tests/test_spans.rs index cb3ee18..74724a5 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -326,8 +326,6 @@ fn annotate_errors(html: &'static str) -> String { } } - let doesnt_support_utf16 = std::sync::Mutex::new(false); - let labeler = |tokens| { let mut labels = Vec::new(); for token in tokens { @@ -336,25 +334,12 @@ fn annotate_errors(html: &'static str) -> String { }; labels.push((span, error.code())); - - use html5tokenizer::Error; - - *doesnt_support_utf16.lock().unwrap() = matches!( - error, - | Error::EndTagWithTrailingSolidus // FIXME - ); } labels }; - // This will be removed once all tested errors support UTF-16. - let _ = labeler(Box::new(tokenizer(html)) as TokenIter); - if *doesnt_support_utf16.lock().unwrap() { - assert_panics_but_should_not(|| assert_char_encoding_independence(html, labeler)); - } else { - // TODO: Move this assertion into test_and_annotate once all tests support it. - assert_char_encoding_independence(html, labeler); - } + // TODO: Move this assertion into test_and_annotate once all tests support it. + assert_char_encoding_independence(html, labeler); test_and_annotate(html, labeler) } |