aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r--src/tokenizer.rs27
1 files changed, 16 insertions, 11 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7eb33f7..02a4d62 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1,5 +1,7 @@
+use std::marker::PhantomData;
+
use crate::machine;
-use crate::offset::NoopOffset;
+use crate::offset::{NoopOffset, Offset, Position};
use crate::reader::{IntoReader, Reader};
use crate::utils::{
control_pat, noncharacter_pat, surrogate_pat, ControlToken, State as InternalState,
@@ -33,12 +35,13 @@ impl<T: Copy> Stack2<T> {
}
/// A HTML tokenizer. See crate-level docs for basic usage.
-pub struct Tokenizer<R: Reader, E: Emitter<R> = DefaultEmitter<R, NoopOffset>> {
+pub struct Tokenizer<R: Reader, O = NoopOffset, E: Emitter<O> = DefaultEmitter<O>> {
eof: bool,
pub(crate) state: InternalState,
pub(crate) emitter: E,
pub(crate) temporary_buffer: String,
pub(crate) reader: R,
+ _offset: PhantomData<O>,
to_reconsume: Stack2<Option<char>>,
pub(crate) character_reference_code: u32,
pub(crate) return_state: Option<InternalState>,
@@ -47,7 +50,7 @@ pub struct Tokenizer<R: Reader, E: Emitter<R> = DefaultEmitter<R, NoopOffset>> {
is_start_tag: bool,
}
-impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
+impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> {
/// Creates a new tokenizer from some input and an emitter.
///
/// TODO: add warning about you needing to do the state switching
@@ -55,6 +58,7 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
Tokenizer {
reader: reader.into_reader(),
emitter,
+ _offset: PhantomData,
state: InternalState::Data,
to_reconsume: Stack2::default(),
return_state: None,
@@ -102,7 +106,7 @@ impl From<State> for InternalState {
}
}
-impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
+impl<R: Reader + Position<O>, O, E: Emitter<O>> Tokenizer<R, O, E> {
/// Test-internal function to override internal state.
///
/// Only available with the `integration-tests` feature which is not public API.
@@ -119,7 +123,7 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
/// Just a helper method for the machine.
#[inline]
pub(crate) fn emit_error(&mut self, error: Error) {
- self.emitter.emit_error(error, &self.reader);
+ self.emitter.emit_error(error, self.reader.position());
}
/// Assuming the _current token_ is an end tag, return true if all of these hold. Return false otherwise.
@@ -136,14 +140,14 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
#[inline]
pub(crate) fn init_start_tag(&mut self) {
- self.emitter.init_start_tag(&self.reader);
+ self.emitter.init_start_tag(self.reader.position());
self.current_tag_name.clear();
self.is_start_tag = true;
}
#[inline]
pub(crate) fn init_end_tag(&mut self) {
- self.emitter.init_end_tag(&self.reader);
+ self.emitter.init_end_tag(self.reader.position());
self.current_tag_name.clear();
self.is_start_tag = false;
}
@@ -270,10 +274,11 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
}
}
-impl<R, E> Iterator for Tokenizer<R, E>
+impl<O, R, E> Iterator for Tokenizer<R, O, E>
where
- R: Reader,
- E: Emitter<R>,
+ O: Offset,
+ R: Reader + Position<O>,
+ E: Emitter<O>,
{
type Item = Result<E::Token, R::Error>;
@@ -297,7 +302,7 @@ where
}
}
-impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
+impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> {
/// Test-internal function to override internal state.
///
/// Only available with the `integration-tests` feature which is not public API.