diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/tokenizer.rs | 59 | 
2 files changed, 54 insertions, 7 deletions
| @@ -19,4 +19,4 @@ pub use emitter::{DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};  pub use error::Error;  pub use never::Never;  pub use reader::{BufReadReader, Readable, Reader, StringReader}; -pub use tokenizer::{InfallibleTokenizer, Tokenizer}; +pub use tokenizer::{InfallibleTokenizer, State, Tokenizer}; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b09e030..b5a2edf 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,3 +1,5 @@ +use std::ops::{Deref, DerefMut}; +  use crate::machine;  use crate::utils::{      control_pat, noncharacter_pat, surrogate_pat, ControlToken, State as InternalState, @@ -55,6 +57,40 @@ impl<R: Reader> Tokenizer<R> {      }  } +/// The states you can set the tokenizer to. +#[derive(Debug)] +#[non_exhaustive] +pub enum State { +    /// The [Data state](https://html.spec.whatwg.org/#data-state). +    Data, +    /// The [PLAINTEXT state](https://html.spec.whatwg.org/#plaintext-state). +    PlainText, +    /// The [RCDATA state](https://html.spec.whatwg.org/#rcdata-state). +    RcData, +    /// The [RAWTEXT state](https://html.spec.whatwg.org/#rawtext-state). +    RawText, +    /// The [Script data state](https://html.spec.whatwg.org/#script-data-state). +    ScriptData, +    /// The [Script data escaped state](https://html.spec.whatwg.org/#script-data-escaped-state). +    ScriptDataEscaped, +    /// The [Script data double escaped state](https://html.spec.whatwg.org/#script-data-double-escaped-state). +    ScriptDataDoubleEscaped, +} + +impl From<State> for InternalState { +    fn from(state: State) -> Self { +        match state { +            State::Data => InternalState::Data, +            State::PlainText => InternalState::PlainText, +            State::RcData => InternalState::RcData, +            State::RawText => InternalState::RawText, +            State::ScriptData => InternalState::ScriptData, +            State::ScriptDataEscaped => InternalState::ScriptDataEscaped, +            State::ScriptDataDoubleEscaped => InternalState::ScriptDataDoubleEscaped, +        } +    } +} +  impl<R: Reader, E: Emitter> Tokenizer<R, E> {      /// Construct a new tokenizer from some input and a custom emitter.      /// @@ -81,12 +117,9 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {          self.state = state;      } -    /// Set the statemachine to start/continue in [plaintext -    /// state](https://html.spec.whatwg.org/#plaintext-state). -    /// -    /// This tokenizer never gets into that state naturally. -    pub fn set_plaintext_state(&mut self) { -        self.state = InternalState::PlainText; +    /// Set the statemachine to start/continue in the given state. +    pub fn set_state(&mut self, state: State) { +        self.state = state.into();      }      /// Test-internal function to override internal state. @@ -254,3 +287,17 @@ impl<R: Reader<Error = Never>, E: Emitter> Iterator for InfallibleTokenizer<R, E          }      }  } + +impl<R: Reader<Error = Never>, E: Emitter> Deref for InfallibleTokenizer<R, E> { +    type Target = Tokenizer<R, E>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} + +impl<R: Reader<Error = Never>, E: Emitter> DerefMut for InfallibleTokenizer<R, E> { +    fn deref_mut(&mut self) -> &mut Self::Target { +        &mut self.0 +    } +} | 
