aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/switch-state.rs20
-rw-r--r--src/emitter.rs18
2 files changed, 38 insertions, 0 deletions
diff --git a/examples/switch-state.rs b/examples/switch-state.rs
new file mode 100644
index 0000000..e966687
--- /dev/null
+++ b/examples/switch-state.rs
@@ -0,0 +1,20 @@
+//! Let's you easily try out the tokenizer with e.g.
+//! printf '<style><b>Hello world!</b></style>' | cargo run --example=switch-state
+use html5gum::{BufReadReader, Token, Tokenizer};
+use std::io::stdin;
+
+fn main() {
+ let stdin = stdin();
+ let mut tokenizer = Tokenizer::new(BufReadReader::new(stdin.lock()));
+
+ while let Some(token) = tokenizer.next() {
+ let token = token.unwrap();
+ println!("{:?}", token);
+
+ if let Token::StartTag(start_tag) = token {
+ // take care of switching parser state for e.g. <script> & <style>
+ // this is not strictly spec-compliant but good enough most of the time
+ tokenizer.set_state(start_tag.next_state(false));
+ }
+ }
+}
diff --git a/src/emitter.rs b/src/emitter.rs
index 8c8976d..0a80544 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -4,6 +4,7 @@ use std::collections::VecDeque;
use std::mem;
use crate::Error;
+use crate::State;
/// An emitter is an object providing methods to the tokenizer to produce tokens.
///
@@ -431,6 +432,23 @@ pub struct StartTag {
pub attributes: BTreeMap<String, String>,
}
+impl StartTag {
+ /// Returns the next tokenizer state according to
+ /// [Parsing HTML fragments](https://html.spec.whatwg.org/multipage/parsing.html#concept-frag-parse-context).
+ /// If `scripting` is set to true [`State::RawText`] is returned if this is a `<noscript>` tag,
+ /// otherwise [`State::Data`] is returned (as with any other regular tag).
+ pub fn next_state(&self, scripting: bool) -> State {
+ match self.name.as_str() {
+ "title" | "textarea" => State::RcData,
+ "style" | "xmp" | "iframe" | "noembed" | "noframes" => State::RawText,
+ "script" => State::ScriptData,
+ "noscript" if scripting => State::RawText,
+ "plaintext" => State::PlainText,
+ _other => State::Data,
+ }
+ }
+}
+
/// A HTML end/close tag, such as `</p>` or `</a>`.
#[derive(Debug, Default, Eq, PartialEq)]
pub struct EndTag {