diff options
author | Martin Fischer <martin@push-f.com> | 2023-08-18 14:14:52 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-08-19 06:41:03 +0200 |
commit | bfff0560a0e448eef41ac2b4c7e8deb0a77e4167 (patch) | |
tree | bc9c3e5665aeedd3676bee2ee2a0133bf840bea2 | |
parent | 9892c726fb212a1af36737d5741ff8421ff20829 (diff) |
break!: remove StartTag::next_state
You shouldn't manually have to match tokens yielded by the
tokenizer iterator just to correctly handle state transitions.
A better NaiveParser API will be introduced.
-rw-r--r-- | examples/switch-state.rs | 20 | ||||
-rw-r--r-- | src/emitter.rs | 18 |
2 files changed, 0 insertions, 38 deletions
diff --git a/examples/switch-state.rs b/examples/switch-state.rs deleted file mode 100644 index 9ebc673..0000000 --- a/examples/switch-state.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Let's you easily try out the tokenizer with e.g. -//! printf '<style><b>Hello world!</b></style>' | cargo run --example=switch-state -use html5tokenizer::{BufReadReader, Token, Tokenizer}; -use std::io::stdin; - -fn main() { - let stdin = stdin(); - let mut tokenizer = Tokenizer::new(BufReadReader::new(stdin.lock())); - - while let Some(token) = tokenizer.next() { - let token = token.unwrap(); - println!("{:?}", token); - - if let Token::StartTag(start_tag) = token { - // take care of switching parser state for e.g. <script> & <style> - // this is not strictly spec-compliant but good enough most of the time - tokenizer.set_state(start_tag.next_state(false)); - } - } -} diff --git a/src/emitter.rs b/src/emitter.rs index fba1f6a..c09fe1c 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -7,7 +7,6 @@ use std::mem; use crate::spans::Span; use crate::Error; -use crate::State; /// An emitter is an object providing methods to the tokenizer to produce tokens. /// @@ -487,23 +486,6 @@ pub struct StartTag<S> { pub name_span: S, } -impl<S> StartTag<S> { - /// Returns the next tokenizer state according to - /// [Parsing HTML fragments](https://html.spec.whatwg.org/multipage/parsing.html#concept-frag-parse-context). - /// If `scripting` is set to true [`State::RawText`] is returned if this is a `<noscript>` tag, - /// otherwise [`State::Data`] is returned (as with any other regular tag). - pub fn next_state(&self, scripting: bool) -> State { - match self.name.as_str() { - "title" | "textarea" => State::RcData, - "style" | "xmp" | "iframe" | "noembed" | "noframes" => State::RawText, - "script" => State::ScriptData, - "noscript" if scripting => State::RawText, - "plaintext" => State::PlainText, - _other => State::Data, - } - } -} - /// A HTML attribute value (plus spans). #[derive(Debug, Default, Eq, PartialEq)] pub struct Attribute<S> { |