diff options
| -rw-r--r-- | integration_tests/tests/test_html5lib.rs | 13 | ||||
| -rw-r--r-- | src/tracing_emitter.rs | 102 | ||||
| -rw-r--r-- | tests/test_spans.rs | 2 | 
3 files changed, 60 insertions, 57 deletions
| diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index 0040a01..eac11dd 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -4,7 +4,7 @@ use html5lib_tests::{      parse_tests, Error as TestError, InitialState, Output, Test, Token as TestToken,  };  use html5tokenizer::{ -    offset::{Offset, Position}, +    offset::{Offset, PosTrackingReader, Position},      reader::Reader,      CdataAction, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter,  }; @@ -75,7 +75,10 @@ fn run_test(fname: &str, test_i: usize, test: Test) {              test_i,              &test,              state, -            Tokenizer::new(&test.input, TracingEmitter::default()), +            Tokenizer::new( +                PosTrackingReader::new(&test.input), +                TracingEmitter::default(), +            ),              "TracingEmitter string",          ); @@ -85,7 +88,7 @@ fn run_test(fname: &str, test_i: usize, test: Test) {              &test,              state,              Tokenizer::new( -                BufReader::new(test.input.as_bytes()), +                PosTrackingReader::new(BufReader::new(test.input.as_bytes())),                  TracingEmitter::default(),              ),              "TracingEmitter bufread", @@ -183,8 +186,8 @@ trait DrainErrors<O> {      fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_>;  } -impl<O> DrainErrors<O> for TracingEmitter<O> { -    fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_> { +impl DrainErrors<usize> for TracingEmitter { +    fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<usize>)> + '_> {          Box::new(self.drain_errors())      }  } diff --git a/src/tracing_emitter.rs b/src/tracing_emitter.rs index 408d9b0..76b20bf 100644 --- a/src/tracing_emitter.rs +++ b/src/tracing_emitter.rs @@ -4,24 +4,24 @@ use std::collections::VecDeque;  use std::ops::Range;  use crate::let_else::assume; -use crate::offset::NoopOffset; -use crate::offset::Offset; -use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token}; +use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag};  use crate::Emitter;  use crate::Error; +type Token = crate::token::Token<usize>; +  /// The default implementation of [`Emitter`], used to produce tokens. -pub struct TracingEmitter<O = NoopOffset> { -    current_token: Option<Token<O>>, +pub struct TracingEmitter { +    current_token: Option<Token>,      current_attribute_name: String, -    current_attr_internal: crate::token::AttrInternal<O>, +    current_attr_internal: crate::token::AttrInternal<usize>,      seen_attributes: BTreeSet<String>, -    emitted_tokens: VecDeque<Token<O>>, -    errors: VecDeque<(Error, Range<O>)>, -    attr_in_end_tag_span: Option<Range<O>>, +    emitted_tokens: VecDeque<Token>, +    errors: VecDeque<(Error, Range<usize>)>, +    attr_in_end_tag_span: Option<Range<usize>>,  } -impl<O: Default> Default for TracingEmitter<O> { +impl Default for TracingEmitter {      fn default() -> Self {          TracingEmitter {              current_token: None, @@ -35,23 +35,23 @@ impl<O: Default> Default for TracingEmitter<O> {      }  } -impl<O> TracingEmitter<O> { +impl TracingEmitter {      /// Removes all encountered tokenizer errors and returns them as an iterator. -    pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ { +    pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<usize>)> + '_ {          self.errors.drain(0..)      }  } -impl<O> Iterator for TracingEmitter<O> { -    type Item = Token<O>; +impl Iterator for TracingEmitter { +    type Item = Token;      fn next(&mut self) -> Option<Self::Item> {          self.emitted_tokens.pop_back()      }  } -impl<O: Offset> Emitter<O> for TracingEmitter<O> { -    fn report_error(&mut self, error: Error, span: Range<O>) { +impl Emitter<usize> for TracingEmitter { +    fn report_error(&mut self, error: Error, span: Range<usize>) {          self.errors.push_back((error, span));      } @@ -63,21 +63,21 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          self.emit_token(Token::EndOfFile);      } -    fn init_start_tag(&mut self, tag_offset: O, name_offset: O) { +    fn init_start_tag(&mut self, tag_offset: usize, name_offset: usize) {          self.current_token = Some(Token::StartTag(StartTag { -            span: tag_offset..O::default(), +            span: tag_offset..0,              self_closing: false,              name: String::new(),              attributes: Default::default(), -            name_span: name_offset..O::default(), +            name_span: name_offset..0,          }));      } -    fn init_end_tag(&mut self, tag_offset: O, name_offset: O) { +    fn init_end_tag(&mut self, tag_offset: usize, name_offset: usize) {          self.current_token = Some(Token::EndTag(EndTag { -            span: tag_offset..O::default(), +            span: tag_offset..0,              name: String::new(), -            name_span: name_offset..O::default(), +            name_span: name_offset..0,          }));          self.seen_attributes.clear();      } @@ -90,7 +90,7 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          name.push_str(s);      } -    fn terminate_tag_name(&mut self, offset: O) { +    fn terminate_tag_name(&mut self, offset: usize) {          assume!(              Some(                  Token::StartTag(StartTag { name_span, .. }) @@ -101,7 +101,7 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          name_span.end = offset;      } -    fn init_attribute_name(&mut self, offset: O) { +    fn init_attribute_name(&mut self, offset: usize) {          self.flush_current_attribute();          self.current_attr_internal.name_span.start = offset;      } @@ -110,11 +110,11 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          self.current_attribute_name.push_str(s);      } -    fn terminate_attribute_name(&mut self, offset: O) { +    fn terminate_attribute_name(&mut self, offset: usize) {          self.current_attr_internal.name_span.end = offset;      } -    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) { +    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: usize) {          self.current_attr_internal.value_span.start = offset;          self.current_attr_internal.value_syntax = Some(syntax);      } @@ -123,11 +123,11 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          self.current_attr_internal.value.push_str(s);      } -    fn terminate_attribute_value(&mut self, offset: O) { +    fn terminate_attribute_value(&mut self, offset: usize) {          self.current_attr_internal.value_span.end = offset;      } -    fn set_self_closing(&mut self, slash_span: Range<O>) { +    fn set_self_closing(&mut self, slash_span: Range<usize>) {          let token = self.current_token.as_mut().unwrap();          match token { @@ -141,7 +141,7 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          }      } -    fn emit_current_tag(&mut self, offset: O) { +    fn emit_current_tag(&mut self, offset: usize) {          self.flush_current_attribute();          let mut token = self.current_token.take().unwrap();          match &mut token { @@ -164,10 +164,10 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          self.emit_token(token);      } -    fn init_comment(&mut self, data_start_offset: O) { +    fn init_comment(&mut self, data_start_offset: usize) {          self.current_token = Some(Token::Comment(Comment {              data: String::new(), -            data_span: data_start_offset..O::default(), +            data_span: data_start_offset..0,          }));      } @@ -176,27 +176,27 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          comment.data.push_str(s);      } -    fn emit_current_comment(&mut self, data_end_offset: O) { +    fn emit_current_comment(&mut self, data_end_offset: usize) {          let mut token = self.current_token.take().unwrap();          assume!(Token::Comment(comment), &mut token);          comment.data_span.end = data_end_offset;          self.emit_token(token);      } -    fn init_doctype(&mut self, offset: O) { +    fn init_doctype(&mut self, offset: usize) {          self.current_token = Some(Token::Doctype(Doctype {              name: None,              force_quirks: false,              public_id: None,              system_id: None, -            span: offset..O::default(), -            name_span: O::default()..O::default(), -            public_id_span: O::default()..O::default(), -            system_id_span: O::default()..O::default(), +            span: offset..0, +            name_span: 0..0, +            public_id_span: 0..0, +            system_id_span: 0..0,          }));      } -    fn init_doctype_name(&mut self, offset: O) { +    fn init_doctype_name(&mut self, offset: usize) {          assume!(Some(Token::Doctype(doctype)), &mut self.current_token);          doctype.name = Some("".into());          doctype.name_span.start = offset; @@ -213,12 +213,12 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          name.push_str(s);      } -    fn terminate_doctype_name(&mut self, offset: O) { +    fn terminate_doctype_name(&mut self, offset: usize) {          assume!(Some(Token::Doctype(doctype)), &mut self.current_token);          doctype.name_span.end = offset;      } -    fn init_doctype_public_id(&mut self, offset: O) { +    fn init_doctype_public_id(&mut self, offset: usize) {          assume!(Some(Token::Doctype(doctype)), &mut self.current_token);          doctype.public_id = Some("".to_owned());          doctype.public_id_span.start = offset; @@ -235,12 +235,12 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          public_id.push_str(s);      } -    fn terminate_doctype_public_id(&mut self, offset: O) { +    fn terminate_doctype_public_id(&mut self, offset: usize) {          assume!(Some(Token::Doctype(doctype)), &mut self.current_token);          doctype.public_id_span.end = offset;      } -    fn init_doctype_system_id(&mut self, offset: O) { +    fn init_doctype_system_id(&mut self, offset: usize) {          assume!(Some(Token::Doctype(doctype)), &mut self.current_token);          doctype.system_id = Some("".to_owned());          doctype.system_id_span.start = offset; @@ -257,7 +257,7 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          id.push_str(s);      } -    fn terminate_doctype_system_id(&mut self, offset: O) { +    fn terminate_doctype_system_id(&mut self, offset: usize) {          assume!(Some(Token::Doctype(doctype)), &mut self.current_token);          doctype.system_id_span.end = offset;      } @@ -267,22 +267,19 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {          doctype.force_quirks = true;      } -    fn emit_current_doctype(&mut self, offset: O) { +    fn emit_current_doctype(&mut self, offset: usize) {          assume!(Some(Token::Doctype(mut doctype)), self.current_token.take());          doctype.span.end = offset;          self.emit_token(Token::Doctype(doctype));      }  } -impl<O> TracingEmitter<O> { -    fn emit_token(&mut self, token: Token<O>) { +impl TracingEmitter { +    fn emit_token(&mut self, token: Token) {          self.emitted_tokens.push_front(token);      } -    fn flush_current_attribute(&mut self) -    where -        O: Offset, -    { +    fn flush_current_attribute(&mut self) {          if self.current_attribute_name.is_empty() {              return;          } @@ -315,13 +312,16 @@ impl<O> TracingEmitter<O> {  #[cfg(test)]  mod tests {      use super::TracingEmitter; +    use crate::offset::PosTrackingReader;      use crate::token::{AttrValueSyntax, Token};      use crate::{Event, Tokenizer};      #[test]      fn test_attribute_value_syntax() {          let mut tokenizer = Tokenizer::new( -            "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">", +            PosTrackingReader::new( +                "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">", +            ),              TracingEmitter::default(),          )          .flatten(); diff --git a/tests/test_spans.rs b/tests/test_spans.rs index fdb9a78..71a6c4b 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -20,7 +20,7 @@ use similar_asserts::assert_eq;  type Parser = NaiveParser<      PosTrackingReader<Box<dyn Reader<Error = Infallible>>>,      usize, -    html5tokenizer::TracingEmitter<usize>, +    html5tokenizer::TracingEmitter,  >;  fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser | 
