diff options
| -rw-r--r-- | examples/switch-state.rs | 20 | ||||
| -rw-r--r-- | src/emitter.rs | 18 | 
2 files changed, 38 insertions, 0 deletions
diff --git a/examples/switch-state.rs b/examples/switch-state.rs new file mode 100644 index 0000000..e966687 --- /dev/null +++ b/examples/switch-state.rs @@ -0,0 +1,20 @@ +//! Let's you easily try out the tokenizer with e.g. +//! printf '<style><b>Hello world!</b></style>' | cargo run --example=switch-state +use html5gum::{BufReadReader, Token, Tokenizer}; +use std::io::stdin; + +fn main() { +    let stdin = stdin(); +    let mut tokenizer = Tokenizer::new(BufReadReader::new(stdin.lock())); + +    while let Some(token) = tokenizer.next() { +        let token = token.unwrap(); +        println!("{:?}", token); + +        if let Token::StartTag(start_tag) = token { +            // take care of switching parser state for e.g. <script> & <style> +            // this is not strictly spec-compliant but good enough most of the time +            tokenizer.set_state(start_tag.next_state(false)); +        } +    } +} diff --git a/src/emitter.rs b/src/emitter.rs index 8c8976d..0a80544 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -4,6 +4,7 @@ use std::collections::VecDeque;  use std::mem;  use crate::Error; +use crate::State;  /// An emitter is an object providing methods to the tokenizer to produce tokens.  /// @@ -431,6 +432,23 @@ pub struct StartTag {      pub attributes: BTreeMap<String, String>,  } +impl StartTag { +    /// Returns the next tokenizer state according to +    /// [Parsing HTML fragments](https://html.spec.whatwg.org/multipage/parsing.html#concept-frag-parse-context). +    /// If `scripting` is set to true [`State::RawText`] is returned if this is a `<noscript>` tag, +    /// otherwise [`State::Data`] is returned (as with any other regular tag). +    pub fn next_state(&self, scripting: bool) -> State { +        match self.name.as_str() { +            "title" | "textarea" => State::RcData, +            "style" | "xmp" | "iframe" | "noembed" | "noframes" => State::RawText, +            "script" => State::ScriptData, +            "noscript" if scripting => State::RawText, +            "plaintext" => State::PlainText, +            _other => State::Data, +        } +    } +} +  /// A HTML end/close tag, such as `</p>` or `</a>`.  #[derive(Debug, Default, Eq, PartialEq)]  pub struct EndTag {  | 
