summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-22 15:34:46 +0200
committerMartin Fischer <martin@push-f.com>2023-09-28 10:36:08 +0200
commit30b4adf60b9423968b0c9c6d23363f6d8cd99384 (patch)
treeaafbdf781282b3440d3b217e6a2614e9cd65a03d
parentd46de6ab592e57a31fef13cfc015c4ce818e8f47 (diff)
break!: remove CdataAction
Which action the tokenizer takes depending on whether or not an adjusted current node is present but not in the HTML namespace, is an implementation detail and shouldn't be exposed in the API.
-rw-r--r--CHANGELOG.md2
-rw-r--r--integration_tests/tests/test_html5lib.rs5
-rw-r--r--src/lib.rs2
-rw-r--r--src/naive_parser.rs3
-rw-r--r--src/tokenizer.rs26
-rw-r--r--src/tokenizer/machine.rs22
6 files changed, 28 insertions, 32 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index de57890..4106edd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -41,6 +41,8 @@
* Added an offset parameter to `emit_eof`.
+* Removed `CdataAction` and changed `handle_cdata_open` to just take a boolean instead.
+
* `NaiveParser`: Removed `new_with_spans`.
### 0.5.1 - 2023-09-03
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs
index 3e07531..010fd44 100644
--- a/integration_tests/tests/test_html5lib.rs
+++ b/integration_tests/tests/test_html5lib.rs
@@ -6,8 +6,7 @@ use html5lib_tests::{
use html5tokenizer::{
offset::{Offset, PosTrackingReader, Position},
reader::Reader,
- BasicEmitter, CdataAction, Emitter, Error, Event, InternalState, Token, Tokenizer,
- TracingEmitter,
+ BasicEmitter, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter,
};
use similar_asserts::assert_eq;
@@ -151,7 +150,7 @@ fn run_test_inner<R, O, E, T>(
while let Some(event) = tokenizer.next() {
let token = match event.unwrap() {
Event::CdataOpen => {
- tokenizer.handle_cdata_open(CdataAction::BogusComment);
+ tokenizer.handle_cdata_open(false);
continue;
}
Event::Token(token) => token.into(),
diff --git a/src/lib.rs b/src/lib.rs
index f4e0369..a76ac39 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -31,7 +31,7 @@ pub use emitter::Emitter;
pub use error::Error;
pub use naive_parser::NaiveParser;
pub use token::{Doctype, EndTag, StartTag, Token};
-pub use tokenizer::{CdataAction, Event, State, Tokenizer};
+pub use tokenizer::{Event, State, Tokenizer};
pub use tracing_emitter::TracingEmitter;
#[cfg(feature = "integration-tests")]
diff --git a/src/naive_parser.rs b/src/naive_parser.rs
index 70b6522..b26e25e 100644
--- a/src/naive_parser.rs
+++ b/src/naive_parser.rs
@@ -1,6 +1,5 @@
use crate::offset::{Offset, Position};
use crate::reader::{IntoReader, Reader};
-use crate::tokenizer::CdataAction;
use crate::{BasicEmitter, Emitter, Event, State, Tokenizer};
/// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction).
@@ -87,7 +86,7 @@ where
}
Ok(Event::CdataOpen) => {
// Naively parse any CDATA sections as bogus comments.
- self.tokenizer.handle_cdata_open(CdataAction::BogusComment)
+ self.tokenizer.handle_cdata_open(false)
}
}
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index b41c208..cd5ae71 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -49,14 +49,20 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
/// To be called when the tokenizer iterator implementation yields [`Event::CdataOpen`].
///
- /// For spec-compliant parsing *action* must be [`CdataAction::Cdata`],
+ /// For spec-compliant parsing the supplied boolean must be `true`
/// if there is an _adjusted current node_ and it is not an element in
- /// the HTML namespace, or [`CdataAction::BogusComment`] otherwise
- /// (as per the third condition under [Markup declaration open state]).
+ /// the HTML namespace, or `false` otherwise (as per the third condition
+ /// under [Markup declaration open state]).
///
/// [Markup declaration open state]: https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
- pub fn handle_cdata_open(&mut self, action: CdataAction) {
- machine::handle_cdata_open(&mut self.machine, action);
+ pub fn handle_cdata_open(
+ &mut self,
+ adjusted_current_node_present_and_not_in_html_namespace: bool,
+ ) {
+ machine::handle_cdata_open(
+ &mut self.machine,
+ adjusted_current_node_present_and_not_in_html_namespace,
+ );
}
/// Returns a mutable reference to the emitter.
@@ -65,16 +71,6 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
}
}
-/// Used by [`Tokenizer::handle_cdata_open`] to determine how to process `<![CDATA[`
-///
-/// (Since as per the spec this depends on the _adjusted current node_).
-pub enum CdataAction {
- /// Process it as CDATA.
- Cdata,
- /// Process it as a bogus comment.
- BogusComment,
-}
-
/// An event yielded by the [`Iterator`] implementation for the [`Tokenizer`].
#[derive(Debug)]
pub enum Event<T> {
diff --git a/src/tokenizer/machine.rs b/src/tokenizer/machine.rs
index d5a1f87..faf1ea8 100644
--- a/src/tokenizer/machine.rs
+++ b/src/tokenizer/machine.rs
@@ -2,7 +2,6 @@ mod utils;
use crate::entities::try_read_character_reference;
use crate::offset::{Offset, Position};
-use crate::tokenizer::CdataAction;
use crate::trace::AttrValueSyntax;
use crate::{reader::Reader, Emitter, Error};
use utils::{
@@ -2041,21 +2040,22 @@ where
}
#[inline]
-pub(super) fn handle_cdata_open<O, R, E>(slf: &mut Machine<R, O, E>, action: CdataAction)
-where
+pub(super) fn handle_cdata_open<O, R, E>(
+ slf: &mut Machine<R, O, E>,
+ adjusted_current_node_present_and_not_in_html_namespace: bool,
+) where
O: Offset,
R: Reader + Position<O>,
E: Emitter<O>,
{
- match action {
- CdataAction::Cdata => slf.state = State::CdataSection,
- CdataAction::BogusComment => {
- slf.emit_error(Error::CdataInHtmlContent);
+ if adjusted_current_node_present_and_not_in_html_namespace {
+ slf.state = State::CdataSection;
+ } else {
+ slf.emit_error(Error::CdataInHtmlContent);
- slf.emitter.init_comment(slf.reader.position());
- slf.emitter.push_comment("[CDATA[");
- slf.state = State::BogusComment;
- }
+ slf.emitter.init_comment(slf.reader.position());
+ slf.emitter.push_comment("[CDATA[");
+ slf.state = State::BogusComment;
}
}