diff options
-rw-r--r-- | CHANGELOG.md | 4 | ||||
-rw-r--r-- | examples/tokenize.rs | 4 | ||||
-rw-r--r-- | integration_tests/tests/test_html5lib.rs | 8 | ||||
-rw-r--r-- | src/lib.rs | 4 | ||||
-rw-r--r-- | src/naive_parser.rs | 8 | ||||
-rw-r--r-- | src/tokenizer.rs | 2 | ||||
-rw-r--r-- | src/tracing_emitter.rs (renamed from src/default_emitter.rs) | 20 | ||||
-rw-r--r-- | tests/test_spans.rs | 2 |
8 files changed, 27 insertions, 25 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f7452f..06831c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,13 +12,15 @@ * Removed the `Error` variant. (Errors now have to be queried separately with - `DefaultEmitter::drain_errors`.) + `TracingEmitter::drain_errors`.) * Replaced the `String` variant with a new `Char` variant. (The tokenizer now emits chars instead of strings.) * Added the `EndOfFile` variant. +* The `DefaultEmitter` has been renamed to `TracingEmitter`. + * The `DefaultEmitter` now emits `Token::EndOfFile` on the end-of-file. (Previously it did not emit any token symbolizing the end-of-file.) diff --git a/examples/tokenize.rs b/examples/tokenize.rs index f8859e4..791db0f 100644 --- a/examples/tokenize.rs +++ b/examples/tokenize.rs @@ -1,13 +1,13 @@ //! Let's you easily try out the tokenizer with e.g. //! printf '<h1>Hello world!</h1>' | cargo run --example=tokenize -use html5tokenizer::{DefaultEmitter, Tokenizer}; +use html5tokenizer::{Tokenizer, TracingEmitter}; use std::io::BufReader; fn main() { let mut tokenizer = Tokenizer::new( BufReader::new(std::io::stdin().lock()), - DefaultEmitter::default(), + TracingEmitter::default(), ); while let Some(token) = tokenizer.next() { for (error, _) in tokenizer.emitter_mut().drain_errors() { diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index a682cb3..36fb880 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -6,7 +6,7 @@ use html5lib_tests::{ use html5tokenizer::{ offset::{Offset, Position}, reader::Reader, - CdataAction, DefaultEmitter, Emitter, Error, Event, InternalState, Token, Tokenizer, + CdataAction, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter, }; use similar_asserts::assert_eq; @@ -73,7 +73,7 @@ fn run_test(fname: &str, test_i: usize, test: Test) { test_i, &test, state, - Tokenizer::new(&test.input, DefaultEmitter::default()), + Tokenizer::new(&test.input, TracingEmitter::default()), "string", ); @@ -84,7 +84,7 @@ fn run_test(fname: &str, test_i: usize, test: Test) { state, Tokenizer::new( BufReader::new(test.input.as_bytes()), - DefaultEmitter::default(), + TracingEmitter::default(), ), "bufread", ); @@ -181,7 +181,7 @@ trait DrainErrors<O> { fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_>; } -impl<O> DrainErrors<O> for DefaultEmitter<O> { +impl<O> DrainErrors<O> for TracingEmitter<O> { fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_> { Box::new(self.drain_errors()) } @@ -7,13 +7,13 @@ #![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))] #![doc = include_str!("../README.md")] -mod default_emitter; mod emitter; mod entities; mod error; mod let_else; mod naive_parser; mod tokenizer; +mod tracing_emitter; /// Types for HTML attributes. pub mod attr { @@ -25,12 +25,12 @@ pub mod offset; pub mod reader; pub mod token; -pub use default_emitter::DefaultEmitter; pub use emitter::Emitter; pub use error::Error; pub use naive_parser::NaiveParser; pub use token::{Comment, Doctype, EndTag, StartTag, Token}; pub use tokenizer::{CdataAction, Event, State, Tokenizer}; +pub use tracing_emitter::TracingEmitter; #[cfg(feature = "integration-tests")] pub use tokenizer::InternalState; diff --git a/src/naive_parser.rs b/src/naive_parser.rs index 4988477..91edbc0 100644 --- a/src/naive_parser.rs +++ b/src/naive_parser.rs @@ -1,7 +1,7 @@ -use crate::default_emitter::DefaultEmitter; use crate::offset::{Offset, Position}; use crate::reader::{IntoReader, Reader}; use crate::tokenizer::CdataAction; +use crate::tracing_emitter::TracingEmitter; use crate::{Emitter, Event, State, Tokenizer}; /// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction). @@ -30,18 +30,18 @@ pub struct NaiveParser<R: Reader, O: Offset, E: Emitter<O>> { tokenizer: Tokenizer<R, O, E>, } -impl<R, O> NaiveParser<R, O, DefaultEmitter<O>> +impl<R, O> NaiveParser<R, O, TracingEmitter<O>> where R: Reader + Position<O>, O: Offset, { /// Constructs a new naive parser. // TODO: add example for NaiveParser::new - pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, DefaultEmitter<O>> + pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, TracingEmitter<O>> where IR: IntoReader<'a, Reader = R>, { - NaiveParser::new_with_emitter(reader, DefaultEmitter::default()) + NaiveParser::new_with_emitter(reader, TracingEmitter::default()) } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7c38e49..d0e2eaf 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -15,7 +15,7 @@ pub use machine::State as InternalState; /// Iterating over the tokenizer directly without calling [`Tokenizer::set_state`] /// results in wrong state transitions: /// -/// ``` +/// ```ignore TODO: unignore once the BasicEmitter has been implemented /// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token}; /// let emitter = DefaultEmitter::default(); /// let html = "<script><b>"; diff --git a/src/default_emitter.rs b/src/tracing_emitter.rs index 7b6c51e..408d9b0 100644 --- a/src/default_emitter.rs +++ b/src/tracing_emitter.rs @@ -11,7 +11,7 @@ use crate::Emitter; use crate::Error; /// The default implementation of [`Emitter`], used to produce tokens. -pub struct DefaultEmitter<O = NoopOffset> { +pub struct TracingEmitter<O = NoopOffset> { current_token: Option<Token<O>>, current_attribute_name: String, current_attr_internal: crate::token::AttrInternal<O>, @@ -21,9 +21,9 @@ pub struct DefaultEmitter<O = NoopOffset> { attr_in_end_tag_span: Option<Range<O>>, } -impl<O: Default> Default for DefaultEmitter<O> { +impl<O: Default> Default for TracingEmitter<O> { fn default() -> Self { - DefaultEmitter { + TracingEmitter { current_token: None, current_attribute_name: String::new(), current_attr_internal: Default::default(), @@ -35,14 +35,14 @@ impl<O: Default> Default for DefaultEmitter<O> { } } -impl<O> DefaultEmitter<O> { +impl<O> TracingEmitter<O> { /// Removes all encountered tokenizer errors and returns them as an iterator. pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ { self.errors.drain(0..) } } -impl<O> Iterator for DefaultEmitter<O> { +impl<O> Iterator for TracingEmitter<O> { type Item = Token<O>; fn next(&mut self) -> Option<Self::Item> { @@ -50,7 +50,7 @@ impl<O> Iterator for DefaultEmitter<O> { } } -impl<O: Offset> Emitter<O> for DefaultEmitter<O> { +impl<O: Offset> Emitter<O> for TracingEmitter<O> { fn report_error(&mut self, error: Error, span: Range<O>) { self.errors.push_back((error, span)); } @@ -274,7 +274,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { } } -impl<O> DefaultEmitter<O> { +impl<O> TracingEmitter<O> { fn emit_token(&mut self, token: Token<O>) { self.emitted_tokens.push_front(token); } @@ -309,12 +309,12 @@ impl<O> DefaultEmitter<O> { } } -/// The majority of our testing of the [`DefaultEmitter`] is done against the +/// The majority of our testing of the [`TracingEmitter`] is done against the /// html5lib-tests in the html5lib integration test. This module only tests /// details that aren't present in the html5lib test data. #[cfg(test)] mod tests { - use super::DefaultEmitter; + use super::TracingEmitter; use crate::token::{AttrValueSyntax, Token}; use crate::{Event, Tokenizer}; @@ -322,7 +322,7 @@ mod tests { fn test_attribute_value_syntax() { let mut tokenizer = Tokenizer::new( "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">", - DefaultEmitter::default(), + TracingEmitter::default(), ) .flatten(); let Event::Token(Token::StartTag(tag)) = tokenizer.next().unwrap() else { diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 64cc250..eb93d43 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -20,7 +20,7 @@ use similar_asserts::assert_eq; type Parser = NaiveParser< PosTrackingReader<Box<dyn Reader<Error = Infallible>>>, usize, - html5tokenizer::DefaultEmitter<usize>, + html5tokenizer::TracingEmitter<usize>, >; fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser |