diff options
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r-- | src/tokenizer.rs | 39 |
1 files changed, 21 insertions, 18 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b46bf45..7e05477 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -3,10 +3,13 @@ mod machine; use crate::naive_parser::naive_next_state; use crate::offset::{Offset, Position}; use crate::reader::{IntoReader, Reader}; -use crate::utils::{control_pat, noncharacter_pat, surrogate_pat, State as InternalState}; +use crate::utils::{control_pat, noncharacter_pat, surrogate_pat}; use crate::{Emitter, Error}; use machine::ControlToken; +#[cfg(feature = "integration-tests")] +use crate::utils::State as InternalState; + // this is a stack that can hold 0 to 2 Ts #[derive(Debug, Default, Clone, Copy)] struct Stack2<T: Copy>(Option<(T, Option<T>)>); @@ -55,21 +58,21 @@ impl<T: Copy> Stack2<T> { /// [tree construction]: https://html.spec.whatwg.org/multipage/parsing.html#tree-construction pub struct Tokenizer<R: Reader, O, E: Emitter<O>> { eof: bool, - pub(crate) state: InternalState, + pub(crate) state: machine::State, pub(crate) emitter: E, pub(crate) temporary_buffer: String, pub(crate) reader: R, to_reconsume: Stack2<Option<char>>, pub(crate) character_reference_code: u32, - pub(crate) return_state: Option<InternalState>, + pub(crate) return_state: Option<machine::State>, current_tag_name: String, last_start_tag_name: String, is_start_tag: bool, /// The reader position before the match block in [`machine::consume`]. pub(crate) position_before_match: O, - /// * Set to the offset of `<` in [`InternalState::Data`]. - /// * Set to the offset of `-` in [`InternalState::Comment`]. - /// * Set to the offset of `&` in [`InternalState::CharacterReference`]. + /// * Set to the offset of `<` in [`machine::State::Data`]. + /// * Set to the offset of `-` in [`machine::State::Comment`]. + /// * Set to the offset of `&` in [`machine::State::CharacterReference`]. pub(crate) some_offset: O, /// This boolean flag exists so that the [`NaiveParser`](crate::NaiveParser) can work with any [`Emitter`] /// (it cannot call [`Tokenizer::set_state`] using the emitted start tags since they can be of an arbitrary type). @@ -87,7 +90,7 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> { Tokenizer { reader: reader.into_reader(), emitter, - state: InternalState::Data, + state: machine::State::Data, to_reconsume: Stack2::default(), return_state: None, temporary_buffer: String::new(), @@ -174,16 +177,16 @@ pub enum State { ScriptDataDoubleEscaped, } -impl From<State> for InternalState { +impl From<State> for machine::State { fn from(state: State) -> Self { match state { - State::Data => InternalState::Data, - State::PlainText => InternalState::PlainText, - State::RcData => InternalState::RcData, - State::RawText => InternalState::RawText, - State::ScriptData => InternalState::ScriptData, - State::ScriptDataEscaped => InternalState::ScriptDataEscaped, - State::ScriptDataDoubleEscaped => InternalState::ScriptDataDoubleEscaped, + State::Data => machine::State::Data, + State::PlainText => machine::State::PlainText, + State::RcData => machine::State::RcData, + State::RawText => machine::State::RawText, + State::ScriptData => machine::State::ScriptData, + State::ScriptDataEscaped => machine::State::ScriptDataEscaped, + State::ScriptDataDoubleEscaped => machine::State::ScriptDataDoubleEscaped, } } } @@ -354,9 +357,9 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> { matches!( self.return_state, Some( - InternalState::AttributeValueDoubleQuoted - | InternalState::AttributeValueSingleQuoted - | InternalState::AttributeValueUnquoted + machine::State::AttributeValueDoubleQuoted + | machine::State::AttributeValueSingleQuoted + | machine::State::AttributeValueUnquoted ) ) } |