diff options
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r-- | src/tokenizer.rs | 27 |
1 files changed, 16 insertions, 11 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7eb33f7..02a4d62 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,5 +1,7 @@ +use std::marker::PhantomData; + use crate::machine; -use crate::offset::NoopOffset; +use crate::offset::{NoopOffset, Offset, Position}; use crate::reader::{IntoReader, Reader}; use crate::utils::{ control_pat, noncharacter_pat, surrogate_pat, ControlToken, State as InternalState, @@ -33,12 +35,13 @@ impl<T: Copy> Stack2<T> { } /// A HTML tokenizer. See crate-level docs for basic usage. -pub struct Tokenizer<R: Reader, E: Emitter<R> = DefaultEmitter<R, NoopOffset>> { +pub struct Tokenizer<R: Reader, O = NoopOffset, E: Emitter<O> = DefaultEmitter<O>> { eof: bool, pub(crate) state: InternalState, pub(crate) emitter: E, pub(crate) temporary_buffer: String, pub(crate) reader: R, + _offset: PhantomData<O>, to_reconsume: Stack2<Option<char>>, pub(crate) character_reference_code: u32, pub(crate) return_state: Option<InternalState>, @@ -47,7 +50,7 @@ pub struct Tokenizer<R: Reader, E: Emitter<R> = DefaultEmitter<R, NoopOffset>> { is_start_tag: bool, } -impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { +impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> { /// Creates a new tokenizer from some input and an emitter. /// /// TODO: add warning about you needing to do the state switching @@ -55,6 +58,7 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { Tokenizer { reader: reader.into_reader(), emitter, + _offset: PhantomData, state: InternalState::Data, to_reconsume: Stack2::default(), return_state: None, @@ -102,7 +106,7 @@ impl From<State> for InternalState { } } -impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { +impl<R: Reader + Position<O>, O, E: Emitter<O>> Tokenizer<R, O, E> { /// Test-internal function to override internal state. /// /// Only available with the `integration-tests` feature which is not public API. @@ -119,7 +123,7 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { /// Just a helper method for the machine. #[inline] pub(crate) fn emit_error(&mut self, error: Error) { - self.emitter.emit_error(error, &self.reader); + self.emitter.emit_error(error, self.reader.position()); } /// Assuming the _current token_ is an end tag, return true if all of these hold. Return false otherwise. @@ -136,14 +140,14 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { #[inline] pub(crate) fn init_start_tag(&mut self) { - self.emitter.init_start_tag(&self.reader); + self.emitter.init_start_tag(self.reader.position()); self.current_tag_name.clear(); self.is_start_tag = true; } #[inline] pub(crate) fn init_end_tag(&mut self) { - self.emitter.init_end_tag(&self.reader); + self.emitter.init_end_tag(self.reader.position()); self.current_tag_name.clear(); self.is_start_tag = false; } @@ -270,10 +274,11 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { } } -impl<R, E> Iterator for Tokenizer<R, E> +impl<O, R, E> Iterator for Tokenizer<R, O, E> where - R: Reader, - E: Emitter<R>, + O: Offset, + R: Reader + Position<O>, + E: Emitter<O>, { type Item = Result<E::Token, R::Error>; @@ -297,7 +302,7 @@ where } } -impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { +impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> { /// Test-internal function to override internal state. /// /// Only available with the `integration-tests` feature which is not public API. |