diff options
-rw-r--r-- | src/lib.rs | 2 | ||||
-rw-r--r-- | src/tokenizer.rs | 59 |
2 files changed, 54 insertions, 7 deletions
@@ -19,4 +19,4 @@ pub use emitter::{DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; pub use error::Error; pub use never::Never; pub use reader::{BufReadReader, Readable, Reader, StringReader}; -pub use tokenizer::{InfallibleTokenizer, Tokenizer}; +pub use tokenizer::{InfallibleTokenizer, State, Tokenizer}; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b09e030..b5a2edf 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,3 +1,5 @@ +use std::ops::{Deref, DerefMut}; + use crate::machine; use crate::utils::{ control_pat, noncharacter_pat, surrogate_pat, ControlToken, State as InternalState, @@ -55,6 +57,40 @@ impl<R: Reader> Tokenizer<R> { } } +/// The states you can set the tokenizer to. +#[derive(Debug)] +#[non_exhaustive] +pub enum State { + /// The [Data state](https://html.spec.whatwg.org/#data-state). + Data, + /// The [PLAINTEXT state](https://html.spec.whatwg.org/#plaintext-state). + PlainText, + /// The [RCDATA state](https://html.spec.whatwg.org/#rcdata-state). + RcData, + /// The [RAWTEXT state](https://html.spec.whatwg.org/#rawtext-state). + RawText, + /// The [Script data state](https://html.spec.whatwg.org/#script-data-state). + ScriptData, + /// The [Script data escaped state](https://html.spec.whatwg.org/#script-data-escaped-state). + ScriptDataEscaped, + /// The [Script data double escaped state](https://html.spec.whatwg.org/#script-data-double-escaped-state). + ScriptDataDoubleEscaped, +} + +impl From<State> for InternalState { + fn from(state: State) -> Self { + match state { + State::Data => InternalState::Data, + State::PlainText => InternalState::PlainText, + State::RcData => InternalState::RcData, + State::RawText => InternalState::RawText, + State::ScriptData => InternalState::ScriptData, + State::ScriptDataEscaped => InternalState::ScriptDataEscaped, + State::ScriptDataDoubleEscaped => InternalState::ScriptDataDoubleEscaped, + } + } +} + impl<R: Reader, E: Emitter> Tokenizer<R, E> { /// Construct a new tokenizer from some input and a custom emitter. /// @@ -81,12 +117,9 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> { self.state = state; } - /// Set the statemachine to start/continue in [plaintext - /// state](https://html.spec.whatwg.org/#plaintext-state). - /// - /// This tokenizer never gets into that state naturally. - pub fn set_plaintext_state(&mut self) { - self.state = InternalState::PlainText; + /// Set the statemachine to start/continue in the given state. + pub fn set_state(&mut self, state: State) { + self.state = state.into(); } /// Test-internal function to override internal state. @@ -254,3 +287,17 @@ impl<R: Reader<Error = Never>, E: Emitter> Iterator for InfallibleTokenizer<R, E } } } + +impl<R: Reader<Error = Never>, E: Emitter> Deref for InfallibleTokenizer<R, E> { + type Target = Tokenizer<R, E>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<R: Reader<Error = Never>, E: Emitter> DerefMut for InfallibleTokenizer<R, E> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} |