summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-18 14:14:52 +0200
committerMartin Fischer <martin@push-f.com>2023-08-19 06:41:03 +0200
commitbfff0560a0e448eef41ac2b4c7e8deb0a77e4167 (patch)
treebc9c3e5665aeedd3676bee2ee2a0133bf840bea2
parent9892c726fb212a1af36737d5741ff8421ff20829 (diff)
break!: remove StartTag::next_state
You shouldn't manually have to match tokens yielded by the tokenizer iterator just to correctly handle state transitions. A better NaiveParser API will be introduced.
-rw-r--r--examples/switch-state.rs20
-rw-r--r--src/emitter.rs18
2 files changed, 0 insertions, 38 deletions
diff --git a/examples/switch-state.rs b/examples/switch-state.rs
deleted file mode 100644
index 9ebc673..0000000
--- a/examples/switch-state.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-//! Let's you easily try out the tokenizer with e.g.
-//! printf '<style><b>Hello world!</b></style>' | cargo run --example=switch-state
-use html5tokenizer::{BufReadReader, Token, Tokenizer};
-use std::io::stdin;
-
-fn main() {
- let stdin = stdin();
- let mut tokenizer = Tokenizer::new(BufReadReader::new(stdin.lock()));
-
- while let Some(token) = tokenizer.next() {
- let token = token.unwrap();
- println!("{:?}", token);
-
- if let Token::StartTag(start_tag) = token {
- // take care of switching parser state for e.g. <script> & <style>
- // this is not strictly spec-compliant but good enough most of the time
- tokenizer.set_state(start_tag.next_state(false));
- }
- }
-}
diff --git a/src/emitter.rs b/src/emitter.rs
index fba1f6a..c09fe1c 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -7,7 +7,6 @@ use std::mem;
use crate::spans::Span;
use crate::Error;
-use crate::State;
/// An emitter is an object providing methods to the tokenizer to produce tokens.
///
@@ -487,23 +486,6 @@ pub struct StartTag<S> {
pub name_span: S,
}
-impl<S> StartTag<S> {
- /// Returns the next tokenizer state according to
- /// [Parsing HTML fragments](https://html.spec.whatwg.org/multipage/parsing.html#concept-frag-parse-context).
- /// If `scripting` is set to true [`State::RawText`] is returned if this is a `<noscript>` tag,
- /// otherwise [`State::Data`] is returned (as with any other regular tag).
- pub fn next_state(&self, scripting: bool) -> State {
- match self.name.as_str() {
- "title" | "textarea" => State::RcData,
- "style" | "xmp" | "iframe" | "noembed" | "noframes" => State::RawText,
- "script" => State::ScriptData,
- "noscript" if scripting => State::RawText,
- "plaintext" => State::PlainText,
- _other => State::Data,
- }
- }
-}
-
/// A HTML attribute value (plus spans).
#[derive(Debug, Default, Eq, PartialEq)]
pub struct Attribute<S> {