summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md4
-rw-r--r--examples/tokenize.rs4
-rw-r--r--integration_tests/tests/test_html5lib.rs8
-rw-r--r--src/lib.rs4
-rw-r--r--src/naive_parser.rs8
-rw-r--r--src/tokenizer.rs2
-rw-r--r--src/tracing_emitter.rs (renamed from src/default_emitter.rs)20
-rw-r--r--tests/test_spans.rs2
8 files changed, 27 insertions, 25 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f7452f..06831c3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,13 +12,15 @@
* Removed the `Error` variant.
(Errors now have to be queried separately with
- `DefaultEmitter::drain_errors`.)
+ `TracingEmitter::drain_errors`.)
* Replaced the `String` variant with a new `Char` variant.
(The tokenizer now emits chars instead of strings.)
* Added the `EndOfFile` variant.
+* The `DefaultEmitter` has been renamed to `TracingEmitter`.
+
* The `DefaultEmitter` now emits `Token::EndOfFile` on the end-of-file.
(Previously it did not emit any token symbolizing the end-of-file.)
diff --git a/examples/tokenize.rs b/examples/tokenize.rs
index f8859e4..791db0f 100644
--- a/examples/tokenize.rs
+++ b/examples/tokenize.rs
@@ -1,13 +1,13 @@
//! Let's you easily try out the tokenizer with e.g.
//! printf '<h1>Hello world!</h1>' | cargo run --example=tokenize
-use html5tokenizer::{DefaultEmitter, Tokenizer};
+use html5tokenizer::{Tokenizer, TracingEmitter};
use std::io::BufReader;
fn main() {
let mut tokenizer = Tokenizer::new(
BufReader::new(std::io::stdin().lock()),
- DefaultEmitter::default(),
+ TracingEmitter::default(),
);
while let Some(token) = tokenizer.next() {
for (error, _) in tokenizer.emitter_mut().drain_errors() {
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs
index a682cb3..36fb880 100644
--- a/integration_tests/tests/test_html5lib.rs
+++ b/integration_tests/tests/test_html5lib.rs
@@ -6,7 +6,7 @@ use html5lib_tests::{
use html5tokenizer::{
offset::{Offset, Position},
reader::Reader,
- CdataAction, DefaultEmitter, Emitter, Error, Event, InternalState, Token, Tokenizer,
+ CdataAction, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter,
};
use similar_asserts::assert_eq;
@@ -73,7 +73,7 @@ fn run_test(fname: &str, test_i: usize, test: Test) {
test_i,
&test,
state,
- Tokenizer::new(&test.input, DefaultEmitter::default()),
+ Tokenizer::new(&test.input, TracingEmitter::default()),
"string",
);
@@ -84,7 +84,7 @@ fn run_test(fname: &str, test_i: usize, test: Test) {
state,
Tokenizer::new(
BufReader::new(test.input.as_bytes()),
- DefaultEmitter::default(),
+ TracingEmitter::default(),
),
"bufread",
);
@@ -181,7 +181,7 @@ trait DrainErrors<O> {
fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_>;
}
-impl<O> DrainErrors<O> for DefaultEmitter<O> {
+impl<O> DrainErrors<O> for TracingEmitter<O> {
fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_> {
Box::new(self.drain_errors())
}
diff --git a/src/lib.rs b/src/lib.rs
index 40b691a..aecbef3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,13 +7,13 @@
#![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))]
#![doc = include_str!("../README.md")]
-mod default_emitter;
mod emitter;
mod entities;
mod error;
mod let_else;
mod naive_parser;
mod tokenizer;
+mod tracing_emitter;
/// Types for HTML attributes.
pub mod attr {
@@ -25,12 +25,12 @@ pub mod offset;
pub mod reader;
pub mod token;
-pub use default_emitter::DefaultEmitter;
pub use emitter::Emitter;
pub use error::Error;
pub use naive_parser::NaiveParser;
pub use token::{Comment, Doctype, EndTag, StartTag, Token};
pub use tokenizer::{CdataAction, Event, State, Tokenizer};
+pub use tracing_emitter::TracingEmitter;
#[cfg(feature = "integration-tests")]
pub use tokenizer::InternalState;
diff --git a/src/naive_parser.rs b/src/naive_parser.rs
index 4988477..91edbc0 100644
--- a/src/naive_parser.rs
+++ b/src/naive_parser.rs
@@ -1,7 +1,7 @@
-use crate::default_emitter::DefaultEmitter;
use crate::offset::{Offset, Position};
use crate::reader::{IntoReader, Reader};
use crate::tokenizer::CdataAction;
+use crate::tracing_emitter::TracingEmitter;
use crate::{Emitter, Event, State, Tokenizer};
/// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction).
@@ -30,18 +30,18 @@ pub struct NaiveParser<R: Reader, O: Offset, E: Emitter<O>> {
tokenizer: Tokenizer<R, O, E>,
}
-impl<R, O> NaiveParser<R, O, DefaultEmitter<O>>
+impl<R, O> NaiveParser<R, O, TracingEmitter<O>>
where
R: Reader + Position<O>,
O: Offset,
{
/// Constructs a new naive parser.
// TODO: add example for NaiveParser::new
- pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, DefaultEmitter<O>>
+ pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, TracingEmitter<O>>
where
IR: IntoReader<'a, Reader = R>,
{
- NaiveParser::new_with_emitter(reader, DefaultEmitter::default())
+ NaiveParser::new_with_emitter(reader, TracingEmitter::default())
}
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7c38e49..d0e2eaf 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -15,7 +15,7 @@ pub use machine::State as InternalState;
/// Iterating over the tokenizer directly without calling [`Tokenizer::set_state`]
/// results in wrong state transitions:
///
-/// ```
+/// ```ignore TODO: unignore once the BasicEmitter has been implemented
/// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token};
/// let emitter = DefaultEmitter::default();
/// let html = "<script><b>";
diff --git a/src/default_emitter.rs b/src/tracing_emitter.rs
index 7b6c51e..408d9b0 100644
--- a/src/default_emitter.rs
+++ b/src/tracing_emitter.rs
@@ -11,7 +11,7 @@ use crate::Emitter;
use crate::Error;
/// The default implementation of [`Emitter`], used to produce tokens.
-pub struct DefaultEmitter<O = NoopOffset> {
+pub struct TracingEmitter<O = NoopOffset> {
current_token: Option<Token<O>>,
current_attribute_name: String,
current_attr_internal: crate::token::AttrInternal<O>,
@@ -21,9 +21,9 @@ pub struct DefaultEmitter<O = NoopOffset> {
attr_in_end_tag_span: Option<Range<O>>,
}
-impl<O: Default> Default for DefaultEmitter<O> {
+impl<O: Default> Default for TracingEmitter<O> {
fn default() -> Self {
- DefaultEmitter {
+ TracingEmitter {
current_token: None,
current_attribute_name: String::new(),
current_attr_internal: Default::default(),
@@ -35,14 +35,14 @@ impl<O: Default> Default for DefaultEmitter<O> {
}
}
-impl<O> DefaultEmitter<O> {
+impl<O> TracingEmitter<O> {
/// Removes all encountered tokenizer errors and returns them as an iterator.
pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ {
self.errors.drain(0..)
}
}
-impl<O> Iterator for DefaultEmitter<O> {
+impl<O> Iterator for TracingEmitter<O> {
type Item = Token<O>;
fn next(&mut self) -> Option<Self::Item> {
@@ -50,7 +50,7 @@ impl<O> Iterator for DefaultEmitter<O> {
}
}
-impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
+impl<O: Offset> Emitter<O> for TracingEmitter<O> {
fn report_error(&mut self, error: Error, span: Range<O>) {
self.errors.push_back((error, span));
}
@@ -274,7 +274,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
}
}
-impl<O> DefaultEmitter<O> {
+impl<O> TracingEmitter<O> {
fn emit_token(&mut self, token: Token<O>) {
self.emitted_tokens.push_front(token);
}
@@ -309,12 +309,12 @@ impl<O> DefaultEmitter<O> {
}
}
-/// The majority of our testing of the [`DefaultEmitter`] is done against the
+/// The majority of our testing of the [`TracingEmitter`] is done against the
/// html5lib-tests in the html5lib integration test. This module only tests
/// details that aren't present in the html5lib test data.
#[cfg(test)]
mod tests {
- use super::DefaultEmitter;
+ use super::TracingEmitter;
use crate::token::{AttrValueSyntax, Token};
use crate::{Event, Tokenizer};
@@ -322,7 +322,7 @@ mod tests {
fn test_attribute_value_syntax() {
let mut tokenizer = Tokenizer::new(
"<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
- DefaultEmitter::default(),
+ TracingEmitter::default(),
)
.flatten();
let Event::Token(Token::StartTag(tag)) = tokenizer.next().unwrap() else {
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index 64cc250..eb93d43 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -20,7 +20,7 @@ use similar_asserts::assert_eq;
type Parser = NaiveParser<
PosTrackingReader<Box<dyn Reader<Error = Infallible>>>,
usize,
- html5tokenizer::DefaultEmitter<usize>,
+ html5tokenizer::TracingEmitter<usize>,
>;
fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser