summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--integration_tests/tests/test_html5lib.rs13
-rw-r--r--src/tracing_emitter.rs102
-rw-r--r--tests/test_spans.rs2
3 files changed, 60 insertions, 57 deletions
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs
index 0040a01..eac11dd 100644
--- a/integration_tests/tests/test_html5lib.rs
+++ b/integration_tests/tests/test_html5lib.rs
@@ -4,7 +4,7 @@ use html5lib_tests::{
parse_tests, Error as TestError, InitialState, Output, Test, Token as TestToken,
};
use html5tokenizer::{
- offset::{Offset, Position},
+ offset::{Offset, PosTrackingReader, Position},
reader::Reader,
CdataAction, Emitter, Error, Event, InternalState, Token, Tokenizer, TracingEmitter,
};
@@ -75,7 +75,10 @@ fn run_test(fname: &str, test_i: usize, test: Test) {
test_i,
&test,
state,
- Tokenizer::new(&test.input, TracingEmitter::default()),
+ Tokenizer::new(
+ PosTrackingReader::new(&test.input),
+ TracingEmitter::default(),
+ ),
"TracingEmitter string",
);
@@ -85,7 +88,7 @@ fn run_test(fname: &str, test_i: usize, test: Test) {
&test,
state,
Tokenizer::new(
- BufReader::new(test.input.as_bytes()),
+ PosTrackingReader::new(BufReader::new(test.input.as_bytes())),
TracingEmitter::default(),
),
"TracingEmitter bufread",
@@ -183,8 +186,8 @@ trait DrainErrors<O> {
fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_>;
}
-impl<O> DrainErrors<O> for TracingEmitter<O> {
- fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<O>)> + '_> {
+impl DrainErrors<usize> for TracingEmitter {
+ fn drain_errors(&mut self) -> Box<dyn Iterator<Item = (Error, Range<usize>)> + '_> {
Box::new(self.drain_errors())
}
}
diff --git a/src/tracing_emitter.rs b/src/tracing_emitter.rs
index 408d9b0..76b20bf 100644
--- a/src/tracing_emitter.rs
+++ b/src/tracing_emitter.rs
@@ -4,24 +4,24 @@ use std::collections::VecDeque;
use std::ops::Range;
use crate::let_else::assume;
-use crate::offset::NoopOffset;
-use crate::offset::Offset;
-use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token};
+use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag};
use crate::Emitter;
use crate::Error;
+type Token = crate::token::Token<usize>;
+
/// The default implementation of [`Emitter`], used to produce tokens.
-pub struct TracingEmitter<O = NoopOffset> {
- current_token: Option<Token<O>>,
+pub struct TracingEmitter {
+ current_token: Option<Token>,
current_attribute_name: String,
- current_attr_internal: crate::token::AttrInternal<O>,
+ current_attr_internal: crate::token::AttrInternal<usize>,
seen_attributes: BTreeSet<String>,
- emitted_tokens: VecDeque<Token<O>>,
- errors: VecDeque<(Error, Range<O>)>,
- attr_in_end_tag_span: Option<Range<O>>,
+ emitted_tokens: VecDeque<Token>,
+ errors: VecDeque<(Error, Range<usize>)>,
+ attr_in_end_tag_span: Option<Range<usize>>,
}
-impl<O: Default> Default for TracingEmitter<O> {
+impl Default for TracingEmitter {
fn default() -> Self {
TracingEmitter {
current_token: None,
@@ -35,23 +35,23 @@ impl<O: Default> Default for TracingEmitter<O> {
}
}
-impl<O> TracingEmitter<O> {
+impl TracingEmitter {
/// Removes all encountered tokenizer errors and returns them as an iterator.
- pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ {
+ pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<usize>)> + '_ {
self.errors.drain(0..)
}
}
-impl<O> Iterator for TracingEmitter<O> {
- type Item = Token<O>;
+impl Iterator for TracingEmitter {
+ type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.emitted_tokens.pop_back()
}
}
-impl<O: Offset> Emitter<O> for TracingEmitter<O> {
- fn report_error(&mut self, error: Error, span: Range<O>) {
+impl Emitter<usize> for TracingEmitter {
+ fn report_error(&mut self, error: Error, span: Range<usize>) {
self.errors.push_back((error, span));
}
@@ -63,21 +63,21 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
self.emit_token(Token::EndOfFile);
}
- fn init_start_tag(&mut self, tag_offset: O, name_offset: O) {
+ fn init_start_tag(&mut self, tag_offset: usize, name_offset: usize) {
self.current_token = Some(Token::StartTag(StartTag {
- span: tag_offset..O::default(),
+ span: tag_offset..0,
self_closing: false,
name: String::new(),
attributes: Default::default(),
- name_span: name_offset..O::default(),
+ name_span: name_offset..0,
}));
}
- fn init_end_tag(&mut self, tag_offset: O, name_offset: O) {
+ fn init_end_tag(&mut self, tag_offset: usize, name_offset: usize) {
self.current_token = Some(Token::EndTag(EndTag {
- span: tag_offset..O::default(),
+ span: tag_offset..0,
name: String::new(),
- name_span: name_offset..O::default(),
+ name_span: name_offset..0,
}));
self.seen_attributes.clear();
}
@@ -90,7 +90,7 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
name.push_str(s);
}
- fn terminate_tag_name(&mut self, offset: O) {
+ fn terminate_tag_name(&mut self, offset: usize) {
assume!(
Some(
Token::StartTag(StartTag { name_span, .. })
@@ -101,7 +101,7 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
name_span.end = offset;
}
- fn init_attribute_name(&mut self, offset: O) {
+ fn init_attribute_name(&mut self, offset: usize) {
self.flush_current_attribute();
self.current_attr_internal.name_span.start = offset;
}
@@ -110,11 +110,11 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
self.current_attribute_name.push_str(s);
}
- fn terminate_attribute_name(&mut self, offset: O) {
+ fn terminate_attribute_name(&mut self, offset: usize) {
self.current_attr_internal.name_span.end = offset;
}
- fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
+ fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: usize) {
self.current_attr_internal.value_span.start = offset;
self.current_attr_internal.value_syntax = Some(syntax);
}
@@ -123,11 +123,11 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
self.current_attr_internal.value.push_str(s);
}
- fn terminate_attribute_value(&mut self, offset: O) {
+ fn terminate_attribute_value(&mut self, offset: usize) {
self.current_attr_internal.value_span.end = offset;
}
- fn set_self_closing(&mut self, slash_span: Range<O>) {
+ fn set_self_closing(&mut self, slash_span: Range<usize>) {
let token = self.current_token.as_mut().unwrap();
match token {
@@ -141,7 +141,7 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
}
}
- fn emit_current_tag(&mut self, offset: O) {
+ fn emit_current_tag(&mut self, offset: usize) {
self.flush_current_attribute();
let mut token = self.current_token.take().unwrap();
match &mut token {
@@ -164,10 +164,10 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
self.emit_token(token);
}
- fn init_comment(&mut self, data_start_offset: O) {
+ fn init_comment(&mut self, data_start_offset: usize) {
self.current_token = Some(Token::Comment(Comment {
data: String::new(),
- data_span: data_start_offset..O::default(),
+ data_span: data_start_offset..0,
}));
}
@@ -176,27 +176,27 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
comment.data.push_str(s);
}
- fn emit_current_comment(&mut self, data_end_offset: O) {
+ fn emit_current_comment(&mut self, data_end_offset: usize) {
let mut token = self.current_token.take().unwrap();
assume!(Token::Comment(comment), &mut token);
comment.data_span.end = data_end_offset;
self.emit_token(token);
}
- fn init_doctype(&mut self, offset: O) {
+ fn init_doctype(&mut self, offset: usize) {
self.current_token = Some(Token::Doctype(Doctype {
name: None,
force_quirks: false,
public_id: None,
system_id: None,
- span: offset..O::default(),
- name_span: O::default()..O::default(),
- public_id_span: O::default()..O::default(),
- system_id_span: O::default()..O::default(),
+ span: offset..0,
+ name_span: 0..0,
+ public_id_span: 0..0,
+ system_id_span: 0..0,
}));
}
- fn init_doctype_name(&mut self, offset: O) {
+ fn init_doctype_name(&mut self, offset: usize) {
assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
doctype.name = Some("".into());
doctype.name_span.start = offset;
@@ -213,12 +213,12 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
name.push_str(s);
}
- fn terminate_doctype_name(&mut self, offset: O) {
+ fn terminate_doctype_name(&mut self, offset: usize) {
assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
doctype.name_span.end = offset;
}
- fn init_doctype_public_id(&mut self, offset: O) {
+ fn init_doctype_public_id(&mut self, offset: usize) {
assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
doctype.public_id = Some("".to_owned());
doctype.public_id_span.start = offset;
@@ -235,12 +235,12 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
public_id.push_str(s);
}
- fn terminate_doctype_public_id(&mut self, offset: O) {
+ fn terminate_doctype_public_id(&mut self, offset: usize) {
assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
doctype.public_id_span.end = offset;
}
- fn init_doctype_system_id(&mut self, offset: O) {
+ fn init_doctype_system_id(&mut self, offset: usize) {
assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
doctype.system_id = Some("".to_owned());
doctype.system_id_span.start = offset;
@@ -257,7 +257,7 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
id.push_str(s);
}
- fn terminate_doctype_system_id(&mut self, offset: O) {
+ fn terminate_doctype_system_id(&mut self, offset: usize) {
assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
doctype.system_id_span.end = offset;
}
@@ -267,22 +267,19 @@ impl<O: Offset> Emitter<O> for TracingEmitter<O> {
doctype.force_quirks = true;
}
- fn emit_current_doctype(&mut self, offset: O) {
+ fn emit_current_doctype(&mut self, offset: usize) {
assume!(Some(Token::Doctype(mut doctype)), self.current_token.take());
doctype.span.end = offset;
self.emit_token(Token::Doctype(doctype));
}
}
-impl<O> TracingEmitter<O> {
- fn emit_token(&mut self, token: Token<O>) {
+impl TracingEmitter {
+ fn emit_token(&mut self, token: Token) {
self.emitted_tokens.push_front(token);
}
- fn flush_current_attribute(&mut self)
- where
- O: Offset,
- {
+ fn flush_current_attribute(&mut self) {
if self.current_attribute_name.is_empty() {
return;
}
@@ -315,13 +312,16 @@ impl<O> TracingEmitter<O> {
#[cfg(test)]
mod tests {
use super::TracingEmitter;
+ use crate::offset::PosTrackingReader;
use crate::token::{AttrValueSyntax, Token};
use crate::{Event, Tokenizer};
#[test]
fn test_attribute_value_syntax() {
let mut tokenizer = Tokenizer::new(
- "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
+ PosTrackingReader::new(
+ "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
+ ),
TracingEmitter::default(),
)
.flatten();
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index fdb9a78..71a6c4b 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -20,7 +20,7 @@ use similar_asserts::assert_eq;
type Parser = NaiveParser<
PosTrackingReader<Box<dyn Reader<Error = Infallible>>>,
usize,
- html5tokenizer::TracingEmitter<usize>,
+ html5tokenizer::TracingEmitter,
>;
fn parser<R>(reader: impl IntoReader<'static, Reader = R>) -> Parser