diff options
| -rw-r--r-- | CHANGELOG.md | 4 | ||||
| -rw-r--r-- | examples/tokenize.rs | 4 | ||||
| -rw-r--r-- | integration_tests/tests/test_html5lib.rs | 8 | ||||
| -rw-r--r-- | src/lib.rs | 4 | ||||
| -rw-r--r-- | src/naive_parser.rs | 8 | ||||
| -rw-r--r-- | src/tokenizer.rs | 2 | ||||
| -rw-r--r-- | src/tracing_emitter.rs (renamed from src/default_emitter.rs) | 20 | ||||
| -rw-r--r-- | tests/test_spans.rs | 2 | 
8 files changed, 27 insertions, 25 deletions
| diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f7452f..06831c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,13 +12,15 @@    * Removed the `Error` variant.        (Errors now have to be queried separately with -    `DefaultEmitter::drain_errors`.) +    `TracingEmitter::drain_errors`.)    * Replaced the `String` variant with a new `Char` variant.        (The tokenizer now emits chars instead of strings.)    * Added the `EndOfFile` variant. +* The `DefaultEmitter` has been renamed to `TracingEmitter`. +  * The `DefaultEmitter` now emits `Token::EndOfFile` on the end-of-file.      (Previously it did not emit any token symbolizing the end-of-file.) diff --git a/examples/tokenize.rs b/examples/tokenize.rs index f8859e4..791db0f 100644 --- a/examples/tokenize.rs +++ b/examples/tokenize.rs @@ -1,13 +1,13 @@  //! Let's you easily try out the tokenizer with e.g.  //! printf '<h1>Hello world!</h1>' | cargo run --example=tokenize -use html5tokenizer::{DefaultEmitter, Tokenizer}; +use html5tokenizer::{Tokenizer, TracingEmitter};  use std::io::BufReader;  fn main() {      let mut tokenizer = Tokenizer::new(          BufReader::new(std::io::stdin().lock()), -        DefaultEmitter::default(), +        TracingEmitter::default(),      );      while let Some(token) = tokenizer.next() {          for (error, _) in tokenizer.emitter_mut().drain_errors() { diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index a682cb3..36fb880 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -6,7 +6,7 @@ use html5lib_tests::{  use html5tokenizer::{      offset::{Offset, Position},      reader::Reader, -    CdataAction, DefaultEmitter, Emitter, Error, Event, InternalState, Token, Tokenizer, +    CdataAction, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter,  };  use similar_asserts::assert_eq; @@ -73,7 +73,7 @@ fn run_test(fname: &str, test_i: usize, test: Test) {              test_i,              &test,              state, -            Tokenizer::new(&test.input, DefaultEmitter::default()), +            Tokenizer::new(&test.input, TracingEmitter::default()),              "string",          ); @@ -84,7 +84,7 @@ fn run_test(fname: &str, test_i: usize, test: Test) {              state,              Tokenizer::new(                  BufReader::new(test.input.as_bytes()), -                DefaultEmitter::default(), +                TracingEmitter::default(),              ),              "bufread",          ); @@ -181,7 +181,7 @@ trait DrainErrors<O> {      fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_>;  } -impl<O> DrainErrors<O> for DefaultEmitter<O> { +impl<O> DrainErrors<O> for TracingEmitter<O> {      fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_> {          Box::new(self.drain_errors())      } @@ -7,13 +7,13 @@  #![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))]  #![doc = include_str!("../README.md")] -mod default_emitter;  mod emitter;  mod entities;  mod error;  mod let_else;  mod naive_parser;  mod tokenizer; +mod tracing_emitter;  /// Types for HTML attributes.  pub mod attr { @@ -25,12 +25,12 @@ pub mod offset;  pub mod reader;  pub mod token; -pub use default_emitter::DefaultEmitter;  pub use emitter::Emitter;  pub use error::Error;  pub use naive_parser::NaiveParser;  pub use token::{Comment, Doctype, EndTag, StartTag, Token};  pub use tokenizer::{CdataAction, Event, State, Tokenizer}; +pub use tracing_emitter::TracingEmitter;  #[cfg(feature = "integration-tests")]  pub use tokenizer::InternalState; diff --git a/src/naive_parser.rs b/src/naive_parser.rs index 4988477..91edbc0 100644 --- a/src/naive_parser.rs +++ b/src/naive_parser.rs @@ -1,7 +1,7 @@ -use crate::default_emitter::DefaultEmitter;  use crate::offset::{Offset, Position};  use crate::reader::{IntoReader, Reader};  use crate::tokenizer::CdataAction; +use crate::tracing_emitter::TracingEmitter;  use crate::{Emitter, Event, State, Tokenizer};  /// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction). @@ -30,18 +30,18 @@ pub struct NaiveParser<R: Reader, O: Offset, E: Emitter<O>> {      tokenizer: Tokenizer<R, O, E>,  } -impl<R, O> NaiveParser<R, O, DefaultEmitter<O>> +impl<R, O> NaiveParser<R, O, TracingEmitter<O>>  where      R: Reader + Position<O>,      O: Offset,  {      /// Constructs a new naive parser.      // TODO: add example for NaiveParser::new -    pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, DefaultEmitter<O>> +    pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, TracingEmitter<O>>      where          IR: IntoReader<'a, Reader = R>,      { -        NaiveParser::new_with_emitter(reader, DefaultEmitter::default()) +        NaiveParser::new_with_emitter(reader, TracingEmitter::default())      }  } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7c38e49..d0e2eaf 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -15,7 +15,7 @@ pub use machine::State as InternalState;  /// Iterating over the tokenizer directly without calling [`Tokenizer::set_state`]  /// results in wrong state transitions:  /// -/// ``` +/// ```ignore TODO: unignore once the BasicEmitter has been implemented  /// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token};  /// let emitter = DefaultEmitter::default();  /// let html = "<script><b>"; diff --git a/src/default_emitter.rs b/src/tracing_emitter.rs index 7b6c51e..408d9b0 100644 --- a/src/default_emitter.rs +++ b/src/tracing_emitter.rs @@ -11,7 +11,7 @@ use crate::Emitter;  use crate::Error;  /// The default implementation of [`Emitter`], used to produce tokens. -pub struct DefaultEmitter<O = NoopOffset> { +pub struct TracingEmitter<O = NoopOffset> {      current_token: Option<Token<O>>,      current_attribute_name: String,      current_attr_internal: crate::token::AttrInternal<O>, @@ -21,9 +21,9 @@ pub struct DefaultEmitter<O = NoopOffset> {      attr_in_end_tag_span: Option<Range<O>>,  } -impl<O: Default> Default for DefaultEmitter<O> { +impl<O: Default> Default for TracingEmitter<O> {      fn default() -> Self { -        DefaultEmitter { +        TracingEmitter {              current_token: None,              current_attribute_name: String::new(),              current_attr_internal: Default::default(), @@ -35,14 +35,14 @@ impl<O: Default> Default for DefaultEmitter<O> {      }  } -impl<O> DefaultEmitter<O> { +impl<O> TracingEmitter<O> {      /// Removes all encountered tokenizer errors and returns them as an iterator.      pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ {          self.errors.drain(0..)      }  } -impl<O> Iterator for DefaultEmitter<O> { +impl<O> Iterator for TracingEmitter<O> {      type Item = Token<O>;      fn next(&mut self) -> Option<Self::Item> { @@ -50,7 +50,7 @@ impl<O> Iterator for DefaultEmitter<O> {      }  } -impl<O: Offset> Emitter<O> for DefaultEmitter<O> { +impl<O: Offset> Emitter<O> for TracingEmitter<O> {      fn report_error(&mut self, error: Error, span: Range<O>) {          self.errors.push_back((error, span));      } @@ -274,7 +274,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {      }  } -impl<O> DefaultEmitter<O> { +impl<O> TracingEmitter<O> {      fn emit_token(&mut self, token: Token<O>) {          self.emitted_tokens.push_front(token);      } @@ -309,12 +309,12 @@ impl<O> DefaultEmitter<O> {      }  } -/// The majority of our testing of the [`DefaultEmitter`] is done against the +/// The majority of our testing of the [`TracingEmitter`] is done against the  /// html5lib-tests in the html5lib integration test. This module only tests  /// details that aren't present in the html5lib test data.  #[cfg(test)]  mod tests { -    use super::DefaultEmitter; +    use super::TracingEmitter;      use crate::token::{AttrValueSyntax, Token};      use crate::{Event, Tokenizer}; @@ -322,7 +322,7 @@ mod tests {      fn test_attribute_value_syntax() {          let mut tokenizer = Tokenizer::new(              "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">", -            DefaultEmitter::default(), +            TracingEmitter::default(),          )          .flatten();          let Event::Token(Token::StartTag(tag)) = tokenizer.next().unwrap() else { diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 64cc250..eb93d43 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -20,7 +20,7 @@ use similar_asserts::assert_eq;  type Parser = NaiveParser<      PosTrackingReader<Box<dyn Reader<Error = Infallible>>>,      usize, -    html5tokenizer::DefaultEmitter<usize>, +    html5tokenizer::TracingEmitter<usize>,  >;  fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser | 
