aboutsummaryrefslogtreecommitdiff
path: root/src/tracing_emitter.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tracing_emitter.rs')
-rw-r--r--src/tracing_emitter.rs148
1 files changed, 90 insertions, 58 deletions
diff --git a/src/tracing_emitter.rs b/src/tracing_emitter.rs
index 76b20bf..408e832 100644
--- a/src/tracing_emitter.rs
+++ b/src/tracing_emitter.rs
@@ -3,20 +3,25 @@ use std::collections::BTreeSet;
use std::collections::VecDeque;
use std::ops::Range;
-use crate::let_else::assume;
-use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag};
+use crate::let_else::{assume, know};
+use crate::token::{Doctype, EndTag, StartTag, Token};
+use crate::trace::AttributeTrace;
+use crate::trace::AttributeTraceList;
+use crate::trace::{
+ AttrValueSyntax, CommentTrace, DoctypeTrace, EndTagTrace, StartTagTrace, Trace,
+};
use crate::Emitter;
use crate::Error;
-type Token = crate::token::Token<usize>;
-
/// The default implementation of [`Emitter`], used to produce tokens.
pub struct TracingEmitter {
current_token: Option<Token>,
+ current_trace: Option<Trace>,
current_attribute_name: String,
- current_attr_internal: crate::token::AttrInternal<usize>,
+ current_attr_internal: crate::token::AttrInternal,
+ current_attribute_trace: crate::trace::AttributeTrace,
seen_attributes: BTreeSet<String>,
- emitted_tokens: VecDeque<Token>,
+ emitted_tokens: VecDeque<(Token, Trace)>,
errors: VecDeque<(Error, Range<usize>)>,
attr_in_end_tag_span: Option<Range<usize>>,
}
@@ -25,8 +30,10 @@ impl Default for TracingEmitter {
fn default() -> Self {
TracingEmitter {
current_token: None,
+ current_trace: None,
current_attribute_name: String::new(),
current_attr_internal: Default::default(),
+ current_attribute_trace: crate::trace::AttributeTrace::new(),
seen_attributes: BTreeSet::new(),
emitted_tokens: VecDeque::new(),
errors: VecDeque::new(),
@@ -43,7 +50,7 @@ impl TracingEmitter {
}
impl Iterator for TracingEmitter {
- type Item = Token;
+ type Item = (Token, Trace);
fn next(&mut self) -> Option<Self::Item> {
self.emitted_tokens.pop_back()
@@ -56,27 +63,32 @@ impl Emitter<usize> for TracingEmitter {
}
fn emit_char(&mut self, c: char) {
- self.emit_token(Token::Char(c));
+ self.emit_token(Token::Char(c), Trace::Char);
}
fn emit_eof(&mut self) {
- self.emit_token(Token::EndOfFile);
+ self.emit_token(Token::EndOfFile, Trace::EndOfFile);
}
fn init_start_tag(&mut self, tag_offset: usize, name_offset: usize) {
self.current_token = Some(Token::StartTag(StartTag {
- span: tag_offset..0,
self_closing: false,
name: String::new(),
attributes: Default::default(),
+ }));
+ self.current_trace = Some(Trace::StartTag(StartTagTrace {
+ span: tag_offset..0,
name_span: name_offset..0,
+ attribute_traces: AttributeTraceList::new(),
}));
}
fn init_end_tag(&mut self, tag_offset: usize, name_offset: usize) {
self.current_token = Some(Token::EndTag(EndTag {
- span: tag_offset..0,
name: String::new(),
+ }));
+ self.current_trace = Some(Trace::EndTag(EndTagTrace {
+ span: tag_offset..0,
name_span: name_offset..0,
}));
self.seen_attributes.clear();
@@ -93,17 +105,17 @@ impl Emitter<usize> for TracingEmitter {
fn terminate_tag_name(&mut self, offset: usize) {
assume!(
Some(
- Token::StartTag(StartTag { name_span, .. })
- | Token::EndTag(EndTag { name_span, .. })
+ Trace::StartTag(StartTagTrace { name_span, .. })
+ | Trace::EndTag(EndTagTrace { name_span, .. })
),
- &mut self.current_token
+ &mut self.current_trace
);
name_span.end = offset;
}
fn init_attribute_name(&mut self, offset: usize) {
self.flush_current_attribute();
- self.current_attr_internal.name_span.start = offset;
+ self.current_attribute_trace.name_span.start = offset;
}
fn push_attribute_name(&mut self, s: &str) {
@@ -111,12 +123,12 @@ impl Emitter<usize> for TracingEmitter {
}
fn terminate_attribute_name(&mut self, offset: usize) {
- self.current_attr_internal.name_span.end = offset;
+ self.current_attribute_trace.name_span.end = offset;
}
fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: usize) {
- self.current_attr_internal.value_span.start = offset;
- self.current_attr_internal.value_syntax = Some(syntax);
+ self.current_attribute_trace.value_span.start = offset;
+ self.current_attribute_trace.value_syntax = Some(syntax);
}
fn push_attribute_value(&mut self, s: &str) {
@@ -124,7 +136,7 @@ impl Emitter<usize> for TracingEmitter {
}
fn terminate_attribute_value(&mut self, offset: usize) {
- self.current_attr_internal.value_span.end = offset;
+ self.current_attribute_trace.value_span.end = offset;
}
fn set_self_closing(&mut self, slash_span: Range<usize>) {
@@ -144,43 +156,47 @@ impl Emitter<usize> for TracingEmitter {
fn emit_current_tag(&mut self, offset: usize) {
self.flush_current_attribute();
let mut token = self.current_token.take().unwrap();
+ let mut trace = self.current_trace.take().unwrap();
match &mut token {
- Token::EndTag(tag) => {
+ Token::EndTag(_) => {
if !self.seen_attributes.is_empty() {
let span = self.attr_in_end_tag_span.take().unwrap();
self.report_error(Error::EndTagWithAttributes, span);
}
self.seen_attributes.clear();
- tag.span.end = offset;
+ know!(Trace::EndTag(tag_trace), &mut trace);
+ tag_trace.span.end = offset;
}
- Token::StartTag(tag) => {
- tag.span.end = offset;
+ Token::StartTag(_) => {
+ know!(Trace::StartTag(tag_trace), &mut trace);
+ tag_trace.span.end = offset;
}
other => {
debug_assert!(false, "unexpected current_token: {other:?}");
return;
}
}
- self.emit_token(token);
+ self.emit_token(token, trace);
}
fn init_comment(&mut self, data_start_offset: usize) {
- self.current_token = Some(Token::Comment(Comment {
- data: String::new(),
+ self.current_token = Some(Token::Comment(String::new()));
+ self.current_trace = Some(Trace::Comment(CommentTrace {
data_span: data_start_offset..0,
}));
}
fn push_comment(&mut self, s: &str) {
- assume!(Some(Token::Comment(comment)), &mut self.current_token);
- comment.data.push_str(s);
+ assume!(Some(Token::Comment(data)), &mut self.current_token);
+ data.push_str(s);
}
fn emit_current_comment(&mut self, data_end_offset: usize) {
- let mut token = self.current_token.take().unwrap();
- assume!(Token::Comment(comment), &mut token);
- comment.data_span.end = data_end_offset;
- self.emit_token(token);
+ let token = self.current_token.take().unwrap();
+ let mut trace = self.current_trace.take().unwrap();
+ assume!(Trace::Comment(comment_trace), &mut trace);
+ comment_trace.data_span.end = data_end_offset;
+ self.emit_token(token, trace);
}
fn init_doctype(&mut self, offset: usize) {
@@ -189,17 +205,15 @@ impl Emitter<usize> for TracingEmitter {
force_quirks: false,
public_id: None,
system_id: None,
- span: offset..0,
- name_span: 0..0,
- public_id_span: 0..0,
- system_id_span: 0..0,
}));
+ self.current_trace = Some(Trace::Doctype(DoctypeTrace::new(offset)));
}
fn init_doctype_name(&mut self, offset: usize) {
assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
doctype.name = Some("".into());
- doctype.name_span.start = offset;
+ know!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace);
+ doctype_trace.set_name_start(offset);
}
fn push_doctype_name(&mut self, s: &str) {
@@ -214,14 +228,15 @@ impl Emitter<usize> for TracingEmitter {
}
fn terminate_doctype_name(&mut self, offset: usize) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.name_span.end = offset;
+ assume!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace);
+ doctype_trace.set_name_end(offset);
}
fn init_doctype_public_id(&mut self, offset: usize) {
assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
doctype.public_id = Some("".to_owned());
- doctype.public_id_span.start = offset;
+ know!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace);
+ doctype_trace.set_public_id_start(offset);
}
fn push_doctype_public_id(&mut self, s: &str) {
@@ -236,14 +251,15 @@ impl Emitter<usize> for TracingEmitter {
}
fn terminate_doctype_public_id(&mut self, offset: usize) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.public_id_span.end = offset;
+ assume!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace);
+ doctype_trace.set_public_id_end(offset);
}
fn init_doctype_system_id(&mut self, offset: usize) {
assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
doctype.system_id = Some("".to_owned());
- doctype.system_id_span.start = offset;
+ know!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace);
+ doctype_trace.set_system_id_start(offset);
}
fn push_doctype_system_id(&mut self, s: &str) {
@@ -258,8 +274,8 @@ impl Emitter<usize> for TracingEmitter {
}
fn terminate_doctype_system_id(&mut self, offset: usize) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.system_id_span.end = offset;
+ assume!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace);
+ doctype_trace.set_system_id_end(offset);
}
fn set_force_quirks(&mut self) {
@@ -268,15 +284,17 @@ impl Emitter<usize> for TracingEmitter {
}
fn emit_current_doctype(&mut self, offset: usize) {
- assume!(Some(Token::Doctype(mut doctype)), self.current_token.take());
- doctype.span.end = offset;
- self.emit_token(Token::Doctype(doctype));
+ assume!(Some(mut trace), self.current_trace.take());
+ assume!(Trace::Doctype(doctype_trace), &mut trace);
+ doctype_trace.span.end = offset;
+ let token = self.current_token.take().unwrap();
+ self.emit_token(token, trace);
}
}
impl TracingEmitter {
- fn emit_token(&mut self, token: Token) {
- self.emitted_tokens.push_front(token);
+ fn emit_token(&mut self, token: Token, trace: Trace) {
+ self.emitted_tokens.push_front((token, trace));
}
fn flush_current_attribute(&mut self) {
@@ -284,21 +302,26 @@ impl TracingEmitter {
return;
}
let name = std::mem::take(&mut self.current_attribute_name);
- let attr_internal = std::mem::take(&mut self.current_attr_internal);
+ let mut attr_internal = std::mem::take(&mut self.current_attr_internal);
+ let attr_trace =
+ std::mem::replace(&mut self.current_attribute_trace, AttributeTrace::new());
match &mut self.current_token {
Some(Token::StartTag(tag)) => match tag.attributes.inner.entry(name) {
Entry::Vacant(vacant) => {
+ know!(Some(Trace::StartTag(trace)), &mut self.current_trace);
+ let trace_idx = trace.attribute_traces.insert(attr_trace);
+ attr_internal.trace_idx = Some(trace_idx);
vacant.insert(attr_internal);
}
Entry::Occupied(_) => {
- self.report_error(Error::DuplicateAttribute, attr_internal.name_span);
+ self.report_error(Error::DuplicateAttribute, attr_trace.name_span);
}
},
Some(Token::EndTag(_)) => {
- self.attr_in_end_tag_span = Some(attr_internal.name_span.clone());
+ self.attr_in_end_tag_span = Some(attr_trace.name_span.clone());
if !self.seen_attributes.insert(name) {
- self.report_error(Error::DuplicateAttribute, attr_internal.name_span);
+ self.report_error(Error::DuplicateAttribute, attr_trace.name_span);
}
}
other => debug_assert!(false, "unexpected current_token: {other:?}"),
@@ -306,6 +329,12 @@ impl TracingEmitter {
}
}
+impl From<(Token, Trace)> for Token {
+ fn from((token, _): (Token, Trace)) -> Self {
+ token
+ }
+}
+
/// The majority of our testing of the [`TracingEmitter`] is done against the
/// html5lib-tests in the html5lib integration test. This module only tests
/// details that aren't present in the html5lib test data.
@@ -313,8 +342,8 @@ impl TracingEmitter {
mod tests {
use super::TracingEmitter;
use crate::offset::PosTrackingReader;
- use crate::token::{AttrValueSyntax, Token};
- use crate::{Event, Tokenizer};
+ use crate::trace::{AttrValueSyntax, Trace};
+ use crate::{Event, Token, Tokenizer};
#[test]
fn test_attribute_value_syntax() {
@@ -325,7 +354,9 @@ mod tests {
TracingEmitter::default(),
)
.flatten();
- let Event::Token(Token::StartTag(tag)) = tokenizer.next().unwrap() else {
+ let Event::Token((Token::StartTag(tag), Trace::StartTag(tag_trace))) =
+ tokenizer.next().unwrap()
+ else {
panic!("expected start tag");
};
for (name, syntax) in [
@@ -334,8 +365,9 @@ mod tests {
("single-quoted", Some(AttrValueSyntax::SingleQuoted)),
("double-quoted", Some(AttrValueSyntax::DoubleQuoted)),
] {
+ let attr_trace_idx = tag.attributes.get(name).unwrap().trace_idx().unwrap();
assert_eq!(
- tag.attributes.get(name).unwrap().value_syntax(),
+ tag_trace.attribute_traces[attr_trace_idx].value_syntax(),
syntax,
"unexpected value for attribute {name}"
);