aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.rs
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-12 11:06:02 +0200
committerMartin Fischer <martin@push-f.com>2023-08-19 06:41:55 +0200
commit9f1019afa7a8e9102d67356d85bd632044eb2d0c (patch)
tree4c6664aad5a11a942d6684a62e507de28193f5bb /src/tokenizer.rs
parentc3d60e88efa32329614178dfc9455ef33ea0a88d (diff)
break!: merge Tokenizer::new_with_emitter into Tokenizer::new
The Tokenizer does not perform any state switching, since proper state switching requires a feedback loop between tokenization and DOM tree building. Using the Tokenizer directly therefore is a bit of a pitfall, since you might not expect it to e.g. tokenize `<script><b>` as: StartTag(StartTag { name: "script", .. }) StartTag(StartTag { name: "b", .. }) Since we don't want to make walking into pitfalls particularly easy, this commit changes the Tokenizer::new method so that you have to specify the Emitter. Since this makes new_with_emitter redundant it is removed.
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r--src/tokenizer.rs36
1 files changed, 15 insertions, 21 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index d7db3b6..5abd6ba 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -42,10 +42,21 @@ pub struct Tokenizer<R: Reader, E: Emitter<R> = DefaultEmitter<R, ()>> {
pub(crate) return_state: Option<InternalState>,
}
-impl<R: Reader> Tokenizer<R> {
- /// Create a new tokenizer from some input.
- pub fn new<'a, S: IntoReader<'a, Reader = R>>(input: S) -> Self {
- Tokenizer::<S::Reader>::new_with_emitter(input, DefaultEmitter::default())
+impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
+ /// Creates a new tokenizer from some input and an emitter.
+ ///
+ /// TODO: add warning about you needing to do the state switching
+ pub fn new<'a>(reader: impl IntoReader<'a, Reader = R>, emitter: E) -> Self {
+ Tokenizer {
+ reader: reader.into_reader(),
+ emitter,
+ state: InternalState::Data,
+ to_reconsume: Stack2::default(),
+ return_state: None,
+ temporary_buffer: String::new(),
+ character_reference_code: 0,
+ eof: false,
+ }
}
}
@@ -84,23 +95,6 @@ impl From<State> for InternalState {
}
impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
- /// Construct a new tokenizer from some input and a custom emitter.
- ///
- /// Use this method over [`Tokenizer::new`] when you want to have more control over string allocation for
- /// tokens.
- pub fn new_with_emitter<'a, S: IntoReader<'a, Reader = R>>(input: S, emitter: E) -> Self {
- Tokenizer {
- eof: false,
- state: InternalState::Data,
- emitter,
- temporary_buffer: String::new(),
- to_reconsume: Stack2::default(),
- reader: input.into_reader(),
- character_reference_code: 0,
- return_state: None,
- }
- }
-
/// Test-internal function to override internal state.
///
/// Only available with the `integration-tests` feature which is not public API.