summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md2
-rw-r--r--integration_tests/tests/test_html5lib.rs5
-rw-r--r--src/lib.rs2
-rw-r--r--src/naive_parser.rs3
-rw-r--r--src/tokenizer.rs26
-rw-r--r--src/tokenizer/machine.rs22
6 files changed, 28 insertions, 32 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index de57890..4106edd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -41,6 +41,8 @@
* Added an offset parameter to `emit_eof`.
+* Removed `CdataAction` and changed `handle_cdata_open` to just take a boolean instead.
+
* `NaiveParser`: Removed `new_with_spans`.
### 0.5.1 - 2023-09-03
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs
index 3e07531..010fd44 100644
--- a/integration_tests/tests/test_html5lib.rs
+++ b/integration_tests/tests/test_html5lib.rs
@@ -6,8 +6,7 @@ use html5lib_tests::{
use html5tokenizer::{
offset::{Offset, PosTrackingReader, Position},
reader::Reader,
- BasicEmitter, CdataAction, Emitter, Error, Event, InternalState, Token, Tokenizer,
- TracingEmitter,
+ BasicEmitter, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter,
};
use similar_asserts::assert_eq;
@@ -151,7 +150,7 @@ fn run_test_inner<R, O, E, T>(
while let Some(event) = tokenizer.next() {
let token = match event.unwrap() {
Event::CdataOpen => {
- tokenizer.handle_cdata_open(CdataAction::BogusComment);
+ tokenizer.handle_cdata_open(false);
continue;
}
Event::Token(token) => token.into(),
diff --git a/src/lib.rs b/src/lib.rs
index f4e0369..a76ac39 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -31,7 +31,7 @@ pub use emitter::Emitter;
pub use error::Error;
pub use naive_parser::NaiveParser;
pub use token::{Doctype, EndTag, StartTag, Token};
-pub use tokenizer::{CdataAction, Event, State, Tokenizer};
+pub use tokenizer::{Event, State, Tokenizer};
pub use tracing_emitter::TracingEmitter;
#[cfg(feature = "integration-tests")]
diff --git a/src/naive_parser.rs b/src/naive_parser.rs
index 70b6522..b26e25e 100644
--- a/src/naive_parser.rs
+++ b/src/naive_parser.rs
@@ -1,6 +1,5 @@
use crate::offset::{Offset, Position};
use crate::reader::{IntoReader, Reader};
-use crate::tokenizer::CdataAction;
use crate::{BasicEmitter, Emitter, Event, State, Tokenizer};
/// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction).
@@ -87,7 +86,7 @@ where
}
Ok(Event::CdataOpen) => {
// Naively parse any CDATA sections as bogus comments.
- self.tokenizer.handle_cdata_open(CdataAction::BogusComment)
+ self.tokenizer.handle_cdata_open(false)
}
}
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index b41c208..cd5ae71 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -49,14 +49,20 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
/// To be called when the tokenizer iterator implementation yields [`Event::CdataOpen`].
///
- /// For spec-compliant parsing *action* must be [`CdataAction::Cdata`],
+ /// For spec-compliant parsing the supplied boolean must be `true`
/// if there is an _adjusted current node_ and it is not an element in
- /// the HTML namespace, or [`CdataAction::BogusComment`] otherwise
- /// (as per the third condition under [Markup declaration open state]).
+ /// the HTML namespace, or `false` otherwise (as per the third condition
+ /// under [Markup declaration open state]).
///
/// [Markup declaration open state]: https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
- pub fn handle_cdata_open(&mut self, action: CdataAction) {
- machine::handle_cdata_open(&mut self.machine, action);
+ pub fn handle_cdata_open(
+ &mut self,
+ adjusted_current_node_present_and_not_in_html_namespace: bool,
+ ) {
+ machine::handle_cdata_open(
+ &mut self.machine,
+ adjusted_current_node_present_and_not_in_html_namespace,
+ );
}
/// Returns a mutable reference to the emitter.
@@ -65,16 +71,6 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
}
}
-/// Used by [`Tokenizer::handle_cdata_open`] to determine how to process `<![CDATA[`
-///
-/// (Since as per the spec this depends on the _adjusted current node_).
-pub enum CdataAction {
- /// Process it as CDATA.
- Cdata,
- /// Process it as a bogus comment.
- BogusComment,
-}
-
/// An event yielded by the [`Iterator`] implementation for the [`Tokenizer`].
#[derive(Debug)]
pub enum Event<T> {
diff --git a/src/tokenizer/machine.rs b/src/tokenizer/machine.rs
index d5a1f87..faf1ea8 100644
--- a/src/tokenizer/machine.rs
+++ b/src/tokenizer/machine.rs
@@ -2,7 +2,6 @@ mod utils;
use crate::entities::try_read_character_reference;
use crate::offset::{Offset, Position};
-use crate::tokenizer::CdataAction;
use crate::trace::AttrValueSyntax;
use crate::{reader::Reader, Emitter, Error};
use utils::{
@@ -2041,21 +2040,22 @@ where
}
#[inline]
-pub(super) fn handle_cdata_open<O, R, E>(slf: &mut Machine<R, O, E>, action: CdataAction)
-where
+pub(super) fn handle_cdata_open<O, R, E>(
+ slf: &mut Machine<R, O, E>,
+ adjusted_current_node_present_and_not_in_html_namespace: bool,
+) where
O: Offset,
R: Reader + Position<O>,
E: Emitter<O>,
{
- match action {
- CdataAction::Cdata => slf.state = State::CdataSection,
- CdataAction::BogusComment => {
- slf.emit_error(Error::CdataInHtmlContent);
+ if adjusted_current_node_present_and_not_in_html_namespace {
+ slf.state = State::CdataSection;
+ } else {
+ slf.emit_error(Error::CdataInHtmlContent);
- slf.emitter.init_comment(slf.reader.position());
- slf.emitter.push_comment("[CDATA[");
- slf.state = State::BogusComment;
- }
+ slf.emitter.init_comment(slf.reader.position());
+ slf.emitter.push_comment("[CDATA[");
+ slf.state = State::BogusComment;
}
}