diff options
author | Martin Fischer <martin@push-f.com> | 2023-08-17 09:40:47 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-08-19 13:41:55 +0200 |
commit | c15895d44d17984386d3684e2aa85aca386ba3bf (patch) | |
tree | a7c92e5eff97bd7645c7d309c8bf94ea891459ad /src/tokenizer.rs | |
parent | d5c9a851756b1e84b022c2fbf984137aae68e2c9 (diff) |
refactor!: make Emitter generic over offset instead of reader
Emitters should not have access to the reader at all. Also the
current position of the reader, at the time an Emitted method is
called, very much depends on machine implementation details such
as if `Tokenizer::unread_char` is used. Having the Emitter
methods take offsets lets the machine take care of providing
the right offsets, as evidenced by the next commit.
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r-- | src/tokenizer.rs | 27 |
1 files changed, 16 insertions, 11 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7eb33f7..02a4d62 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,5 +1,7 @@ +use std::marker::PhantomData; + use crate::machine; -use crate::offset::NoopOffset; +use crate::offset::{NoopOffset, Offset, Position}; use crate::reader::{IntoReader, Reader}; use crate::utils::{ control_pat, noncharacter_pat, surrogate_pat, ControlToken, State as InternalState, @@ -33,12 +35,13 @@ impl<T: Copy> Stack2<T> { } /// A HTML tokenizer. See crate-level docs for basic usage. -pub struct Tokenizer<R: Reader, E: Emitter<R> = DefaultEmitter<R, NoopOffset>> { +pub struct Tokenizer<R: Reader, O = NoopOffset, E: Emitter<O> = DefaultEmitter<O>> { eof: bool, pub(crate) state: InternalState, pub(crate) emitter: E, pub(crate) temporary_buffer: String, pub(crate) reader: R, + _offset: PhantomData<O>, to_reconsume: Stack2<Option<char>>, pub(crate) character_reference_code: u32, pub(crate) return_state: Option<InternalState>, @@ -47,7 +50,7 @@ pub struct Tokenizer<R: Reader, E: Emitter<R> = DefaultEmitter<R, NoopOffset>> { is_start_tag: bool, } -impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { +impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> { /// Creates a new tokenizer from some input and an emitter. /// /// TODO: add warning about you needing to do the state switching @@ -55,6 +58,7 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { Tokenizer { reader: reader.into_reader(), emitter, + _offset: PhantomData, state: InternalState::Data, to_reconsume: Stack2::default(), return_state: None, @@ -102,7 +106,7 @@ impl From<State> for InternalState { } } -impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { +impl<R: Reader + Position<O>, O, E: Emitter<O>> Tokenizer<R, O, E> { /// Test-internal function to override internal state. /// /// Only available with the `integration-tests` feature which is not public API. @@ -119,7 +123,7 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { /// Just a helper method for the machine. #[inline] pub(crate) fn emit_error(&mut self, error: Error) { - self.emitter.emit_error(error, &self.reader); + self.emitter.emit_error(error, self.reader.position()); } /// Assuming the _current token_ is an end tag, return true if all of these hold. Return false otherwise. @@ -136,14 +140,14 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { #[inline] pub(crate) fn init_start_tag(&mut self) { - self.emitter.init_start_tag(&self.reader); + self.emitter.init_start_tag(self.reader.position()); self.current_tag_name.clear(); self.is_start_tag = true; } #[inline] pub(crate) fn init_end_tag(&mut self) { - self.emitter.init_end_tag(&self.reader); + self.emitter.init_end_tag(self.reader.position()); self.current_tag_name.clear(); self.is_start_tag = false; } @@ -270,10 +274,11 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { } } -impl<R, E> Iterator for Tokenizer<R, E> +impl<O, R, E> Iterator for Tokenizer<R, O, E> where - R: Reader, - E: Emitter<R>, + O: Offset, + R: Reader + Position<O>, + E: Emitter<O>, { type Item = Result<E::Token, R::Error>; @@ -297,7 +302,7 @@ where } } -impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> { +impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> { /// Test-internal function to override internal state. /// /// Only available with the `integration-tests` feature which is not public API. |