diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-22 15:34:46 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-28 10:36:08 +0200 |
commit | 30b4adf60b9423968b0c9c6d23363f6d8cd99384 (patch) | |
tree | aafbdf781282b3440d3b217e6a2614e9cd65a03d | |
parent | d46de6ab592e57a31fef13cfc015c4ce818e8f47 (diff) |
break!: remove CdataAction
Which action the tokenizer takes depending on whether or not an
adjusted current node is present but not in the HTML namespace,
is an implementation detail and shouldn't be exposed in the API.
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | integration_tests/tests/test_html5lib.rs | 5 | ||||
-rw-r--r-- | src/lib.rs | 2 | ||||
-rw-r--r-- | src/naive_parser.rs | 3 | ||||
-rw-r--r-- | src/tokenizer.rs | 26 | ||||
-rw-r--r-- | src/tokenizer/machine.rs | 22 |
6 files changed, 28 insertions, 32 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index de57890..4106edd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,8 @@ * Added an offset parameter to `emit_eof`. +* Removed `CdataAction` and changed `handle_cdata_open` to just take a boolean instead. + * `NaiveParser`: Removed `new_with_spans`. ### 0.5.1 - 2023-09-03 diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index 3e07531..010fd44 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -6,8 +6,7 @@ use html5lib_tests::{ use html5tokenizer::{ offset::{Offset, PosTrackingReader, Position}, reader::Reader, - BasicEmitter, CdataAction, Emitter, Error, Event, InternalState, Token, Tokenizer, - TracingEmitter, + BasicEmitter, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter, }; use similar_asserts::assert_eq; @@ -151,7 +150,7 @@ fn run_test_inner<R, O, E, T>( while let Some(event) = tokenizer.next() { let token = match event.unwrap() { Event::CdataOpen => { - tokenizer.handle_cdata_open(CdataAction::BogusComment); + tokenizer.handle_cdata_open(false); continue; } Event::Token(token) => token.into(), @@ -31,7 +31,7 @@ pub use emitter::Emitter; pub use error::Error; pub use naive_parser::NaiveParser; pub use token::{Doctype, EndTag, StartTag, Token}; -pub use tokenizer::{CdataAction, Event, State, Tokenizer}; +pub use tokenizer::{Event, State, Tokenizer}; pub use tracing_emitter::TracingEmitter; #[cfg(feature = "integration-tests")] diff --git a/src/naive_parser.rs b/src/naive_parser.rs index 70b6522..b26e25e 100644 --- a/src/naive_parser.rs +++ b/src/naive_parser.rs @@ -1,6 +1,5 @@ use crate::offset::{Offset, Position}; use crate::reader::{IntoReader, Reader}; -use crate::tokenizer::CdataAction; use crate::{BasicEmitter, Emitter, Event, State, Tokenizer}; /// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction). @@ -87,7 +86,7 @@ where } Ok(Event::CdataOpen) => { // Naively parse any CDATA sections as bogus comments. - self.tokenizer.handle_cdata_open(CdataAction::BogusComment) + self.tokenizer.handle_cdata_open(false) } } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b41c208..cd5ae71 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -49,14 +49,20 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> { /// To be called when the tokenizer iterator implementation yields [`Event::CdataOpen`]. /// - /// For spec-compliant parsing *action* must be [`CdataAction::Cdata`], + /// For spec-compliant parsing the supplied boolean must be `true` /// if there is an _adjusted current node_ and it is not an element in - /// the HTML namespace, or [`CdataAction::BogusComment`] otherwise - /// (as per the third condition under [Markup declaration open state]). + /// the HTML namespace, or `false` otherwise (as per the third condition + /// under [Markup declaration open state]). /// /// [Markup declaration open state]: https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state - pub fn handle_cdata_open(&mut self, action: CdataAction) { - machine::handle_cdata_open(&mut self.machine, action); + pub fn handle_cdata_open( + &mut self, + adjusted_current_node_present_and_not_in_html_namespace: bool, + ) { + machine::handle_cdata_open( + &mut self.machine, + adjusted_current_node_present_and_not_in_html_namespace, + ); } /// Returns a mutable reference to the emitter. @@ -65,16 +71,6 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> { } } -/// Used by [`Tokenizer::handle_cdata_open`] to determine how to process `<![CDATA[` -/// -/// (Since as per the spec this depends on the _adjusted current node_). -pub enum CdataAction { - /// Process it as CDATA. - Cdata, - /// Process it as a bogus comment. - BogusComment, -} - /// An event yielded by the [`Iterator`] implementation for the [`Tokenizer`]. #[derive(Debug)] pub enum Event<T> { diff --git a/src/tokenizer/machine.rs b/src/tokenizer/machine.rs index d5a1f87..faf1ea8 100644 --- a/src/tokenizer/machine.rs +++ b/src/tokenizer/machine.rs @@ -2,7 +2,6 @@ mod utils; use crate::entities::try_read_character_reference; use crate::offset::{Offset, Position}; -use crate::tokenizer::CdataAction; use crate::trace::AttrValueSyntax; use crate::{reader::Reader, Emitter, Error}; use utils::{ @@ -2041,21 +2040,22 @@ where } #[inline] -pub(super) fn handle_cdata_open<O, R, E>(slf: &mut Machine<R, O, E>, action: CdataAction) -where +pub(super) fn handle_cdata_open<O, R, E>( + slf: &mut Machine<R, O, E>, + adjusted_current_node_present_and_not_in_html_namespace: bool, +) where O: Offset, R: Reader + Position<O>, E: Emitter<O>, { - match action { - CdataAction::Cdata => slf.state = State::CdataSection, - CdataAction::BogusComment => { - slf.emit_error(Error::CdataInHtmlContent); + if adjusted_current_node_present_and_not_in_html_namespace { + slf.state = State::CdataSection; + } else { + slf.emit_error(Error::CdataInHtmlContent); - slf.emitter.init_comment(slf.reader.position()); - slf.emitter.push_comment("[CDATA["); - slf.state = State::BogusComment; - } + slf.emitter.init_comment(slf.reader.position()); + slf.emitter.push_comment("[CDATA["); + slf.state = State::BogusComment; } } |