diff options
Diffstat (limited to 'src/tracing_emitter.rs')
-rw-r--r-- | src/tracing_emitter.rs | 148 |
1 files changed, 90 insertions, 58 deletions
diff --git a/src/tracing_emitter.rs b/src/tracing_emitter.rs index 76b20bf..408e832 100644 --- a/src/tracing_emitter.rs +++ b/src/tracing_emitter.rs @@ -3,20 +3,25 @@ use std::collections::BTreeSet; use std::collections::VecDeque; use std::ops::Range; -use crate::let_else::assume; -use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag}; +use crate::let_else::{assume, know}; +use crate::token::{Doctype, EndTag, StartTag, Token}; +use crate::trace::AttributeTrace; +use crate::trace::AttributeTraceList; +use crate::trace::{ + AttrValueSyntax, CommentTrace, DoctypeTrace, EndTagTrace, StartTagTrace, Trace, +}; use crate::Emitter; use crate::Error; -type Token = crate::token::Token<usize>; - /// The default implementation of [`Emitter`], used to produce tokens. pub struct TracingEmitter { current_token: Option<Token>, + current_trace: Option<Trace>, current_attribute_name: String, - current_attr_internal: crate::token::AttrInternal<usize>, + current_attr_internal: crate::token::AttrInternal, + current_attribute_trace: crate::trace::AttributeTrace, seen_attributes: BTreeSet<String>, - emitted_tokens: VecDeque<Token>, + emitted_tokens: VecDeque<(Token, Trace)>, errors: VecDeque<(Error, Range<usize>)>, attr_in_end_tag_span: Option<Range<usize>>, } @@ -25,8 +30,10 @@ impl Default for TracingEmitter { fn default() -> Self { TracingEmitter { current_token: None, + current_trace: None, current_attribute_name: String::new(), current_attr_internal: Default::default(), + current_attribute_trace: crate::trace::AttributeTrace::new(), seen_attributes: BTreeSet::new(), emitted_tokens: VecDeque::new(), errors: VecDeque::new(), @@ -43,7 +50,7 @@ impl TracingEmitter { } impl Iterator for TracingEmitter { - type Item = Token; + type Item = (Token, Trace); fn next(&mut self) -> Option<Self::Item> { self.emitted_tokens.pop_back() @@ -56,27 +63,32 @@ impl Emitter<usize> for TracingEmitter { } fn emit_char(&mut self, c: char) { - self.emit_token(Token::Char(c)); + self.emit_token(Token::Char(c), Trace::Char); } fn emit_eof(&mut self) { - self.emit_token(Token::EndOfFile); + self.emit_token(Token::EndOfFile, Trace::EndOfFile); } fn init_start_tag(&mut self, tag_offset: usize, name_offset: usize) { self.current_token = Some(Token::StartTag(StartTag { - span: tag_offset..0, self_closing: false, name: String::new(), attributes: Default::default(), + })); + self.current_trace = Some(Trace::StartTag(StartTagTrace { + span: tag_offset..0, name_span: name_offset..0, + attribute_traces: AttributeTraceList::new(), })); } fn init_end_tag(&mut self, tag_offset: usize, name_offset: usize) { self.current_token = Some(Token::EndTag(EndTag { - span: tag_offset..0, name: String::new(), + })); + self.current_trace = Some(Trace::EndTag(EndTagTrace { + span: tag_offset..0, name_span: name_offset..0, })); self.seen_attributes.clear(); @@ -93,17 +105,17 @@ impl Emitter<usize> for TracingEmitter { fn terminate_tag_name(&mut self, offset: usize) { assume!( Some( - Token::StartTag(StartTag { name_span, .. }) - | Token::EndTag(EndTag { name_span, .. }) + Trace::StartTag(StartTagTrace { name_span, .. }) + | Trace::EndTag(EndTagTrace { name_span, .. }) ), - &mut self.current_token + &mut self.current_trace ); name_span.end = offset; } fn init_attribute_name(&mut self, offset: usize) { self.flush_current_attribute(); - self.current_attr_internal.name_span.start = offset; + self.current_attribute_trace.name_span.start = offset; } fn push_attribute_name(&mut self, s: &str) { @@ -111,12 +123,12 @@ impl Emitter<usize> for TracingEmitter { } fn terminate_attribute_name(&mut self, offset: usize) { - self.current_attr_internal.name_span.end = offset; + self.current_attribute_trace.name_span.end = offset; } fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: usize) { - self.current_attr_internal.value_span.start = offset; - self.current_attr_internal.value_syntax = Some(syntax); + self.current_attribute_trace.value_span.start = offset; + self.current_attribute_trace.value_syntax = Some(syntax); } fn push_attribute_value(&mut self, s: &str) { @@ -124,7 +136,7 @@ impl Emitter<usize> for TracingEmitter { } fn terminate_attribute_value(&mut self, offset: usize) { - self.current_attr_internal.value_span.end = offset; + self.current_attribute_trace.value_span.end = offset; } fn set_self_closing(&mut self, slash_span: Range<usize>) { @@ -144,43 +156,47 @@ impl Emitter<usize> for TracingEmitter { fn emit_current_tag(&mut self, offset: usize) { self.flush_current_attribute(); let mut token = self.current_token.take().unwrap(); + let mut trace = self.current_trace.take().unwrap(); match &mut token { - Token::EndTag(tag) => { + Token::EndTag(_) => { if !self.seen_attributes.is_empty() { let span = self.attr_in_end_tag_span.take().unwrap(); self.report_error(Error::EndTagWithAttributes, span); } self.seen_attributes.clear(); - tag.span.end = offset; + know!(Trace::EndTag(tag_trace), &mut trace); + tag_trace.span.end = offset; } - Token::StartTag(tag) => { - tag.span.end = offset; + Token::StartTag(_) => { + know!(Trace::StartTag(tag_trace), &mut trace); + tag_trace.span.end = offset; } other => { debug_assert!(false, "unexpected current_token: {other:?}"); return; } } - self.emit_token(token); + self.emit_token(token, trace); } fn init_comment(&mut self, data_start_offset: usize) { - self.current_token = Some(Token::Comment(Comment { - data: String::new(), + self.current_token = Some(Token::Comment(String::new())); + self.current_trace = Some(Trace::Comment(CommentTrace { data_span: data_start_offset..0, })); } fn push_comment(&mut self, s: &str) { - assume!(Some(Token::Comment(comment)), &mut self.current_token); - comment.data.push_str(s); + assume!(Some(Token::Comment(data)), &mut self.current_token); + data.push_str(s); } fn emit_current_comment(&mut self, data_end_offset: usize) { - let mut token = self.current_token.take().unwrap(); - assume!(Token::Comment(comment), &mut token); - comment.data_span.end = data_end_offset; - self.emit_token(token); + let token = self.current_token.take().unwrap(); + let mut trace = self.current_trace.take().unwrap(); + assume!(Trace::Comment(comment_trace), &mut trace); + comment_trace.data_span.end = data_end_offset; + self.emit_token(token, trace); } fn init_doctype(&mut self, offset: usize) { @@ -189,17 +205,15 @@ impl Emitter<usize> for TracingEmitter { force_quirks: false, public_id: None, system_id: None, - span: offset..0, - name_span: 0..0, - public_id_span: 0..0, - system_id_span: 0..0, })); + self.current_trace = Some(Trace::Doctype(DoctypeTrace::new(offset))); } fn init_doctype_name(&mut self, offset: usize) { assume!(Some(Token::Doctype(doctype)), &mut self.current_token); doctype.name = Some("".into()); - doctype.name_span.start = offset; + know!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace); + doctype_trace.set_name_start(offset); } fn push_doctype_name(&mut self, s: &str) { @@ -214,14 +228,15 @@ impl Emitter<usize> for TracingEmitter { } fn terminate_doctype_name(&mut self, offset: usize) { - assume!(Some(Token::Doctype(doctype)), &mut self.current_token); - doctype.name_span.end = offset; + assume!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace); + doctype_trace.set_name_end(offset); } fn init_doctype_public_id(&mut self, offset: usize) { assume!(Some(Token::Doctype(doctype)), &mut self.current_token); doctype.public_id = Some("".to_owned()); - doctype.public_id_span.start = offset; + know!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace); + doctype_trace.set_public_id_start(offset); } fn push_doctype_public_id(&mut self, s: &str) { @@ -236,14 +251,15 @@ impl Emitter<usize> for TracingEmitter { } fn terminate_doctype_public_id(&mut self, offset: usize) { - assume!(Some(Token::Doctype(doctype)), &mut self.current_token); - doctype.public_id_span.end = offset; + assume!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace); + doctype_trace.set_public_id_end(offset); } fn init_doctype_system_id(&mut self, offset: usize) { assume!(Some(Token::Doctype(doctype)), &mut self.current_token); doctype.system_id = Some("".to_owned()); - doctype.system_id_span.start = offset; + know!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace); + doctype_trace.set_system_id_start(offset); } fn push_doctype_system_id(&mut self, s: &str) { @@ -258,8 +274,8 @@ impl Emitter<usize> for TracingEmitter { } fn terminate_doctype_system_id(&mut self, offset: usize) { - assume!(Some(Token::Doctype(doctype)), &mut self.current_token); - doctype.system_id_span.end = offset; + assume!(Some(Trace::Doctype(doctype_trace)), &mut self.current_trace); + doctype_trace.set_system_id_end(offset); } fn set_force_quirks(&mut self) { @@ -268,15 +284,17 @@ impl Emitter<usize> for TracingEmitter { } fn emit_current_doctype(&mut self, offset: usize) { - assume!(Some(Token::Doctype(mut doctype)), self.current_token.take()); - doctype.span.end = offset; - self.emit_token(Token::Doctype(doctype)); + assume!(Some(mut trace), self.current_trace.take()); + assume!(Trace::Doctype(doctype_trace), &mut trace); + doctype_trace.span.end = offset; + let token = self.current_token.take().unwrap(); + self.emit_token(token, trace); } } impl TracingEmitter { - fn emit_token(&mut self, token: Token) { - self.emitted_tokens.push_front(token); + fn emit_token(&mut self, token: Token, trace: Trace) { + self.emitted_tokens.push_front((token, trace)); } fn flush_current_attribute(&mut self) { @@ -284,21 +302,26 @@ impl TracingEmitter { return; } let name = std::mem::take(&mut self.current_attribute_name); - let attr_internal = std::mem::take(&mut self.current_attr_internal); + let mut attr_internal = std::mem::take(&mut self.current_attr_internal); + let attr_trace = + std::mem::replace(&mut self.current_attribute_trace, AttributeTrace::new()); match &mut self.current_token { Some(Token::StartTag(tag)) => match tag.attributes.inner.entry(name) { Entry::Vacant(vacant) => { + know!(Some(Trace::StartTag(trace)), &mut self.current_trace); + let trace_idx = trace.attribute_traces.insert(attr_trace); + attr_internal.trace_idx = Some(trace_idx); vacant.insert(attr_internal); } Entry::Occupied(_) => { - self.report_error(Error::DuplicateAttribute, attr_internal.name_span); + self.report_error(Error::DuplicateAttribute, attr_trace.name_span); } }, Some(Token::EndTag(_)) => { - self.attr_in_end_tag_span = Some(attr_internal.name_span.clone()); + self.attr_in_end_tag_span = Some(attr_trace.name_span.clone()); if !self.seen_attributes.insert(name) { - self.report_error(Error::DuplicateAttribute, attr_internal.name_span); + self.report_error(Error::DuplicateAttribute, attr_trace.name_span); } } other => debug_assert!(false, "unexpected current_token: {other:?}"), @@ -306,6 +329,12 @@ impl TracingEmitter { } } +impl From<(Token, Trace)> for Token { + fn from((token, _): (Token, Trace)) -> Self { + token + } +} + /// The majority of our testing of the [`TracingEmitter`] is done against the /// html5lib-tests in the html5lib integration test. This module only tests /// details that aren't present in the html5lib test data. @@ -313,8 +342,8 @@ impl TracingEmitter { mod tests { use super::TracingEmitter; use crate::offset::PosTrackingReader; - use crate::token::{AttrValueSyntax, Token}; - use crate::{Event, Tokenizer}; + use crate::trace::{AttrValueSyntax, Trace}; + use crate::{Event, Token, Tokenizer}; #[test] fn test_attribute_value_syntax() { @@ -325,7 +354,9 @@ mod tests { TracingEmitter::default(), ) .flatten(); - let Event::Token(Token::StartTag(tag)) = tokenizer.next().unwrap() else { + let Event::Token((Token::StartTag(tag), Trace::StartTag(tag_trace))) = + tokenizer.next().unwrap() + else { panic!("expected start tag"); }; for (name, syntax) in [ @@ -334,8 +365,9 @@ mod tests { ("single-quoted", Some(AttrValueSyntax::SingleQuoted)), ("double-quoted", Some(AttrValueSyntax::DoubleQuoted)), ] { + let attr_trace_idx = tag.attributes.get(name).unwrap().trace_idx().unwrap(); assert_eq!( - tag.attributes.get(name).unwrap().value_syntax(), + tag_trace.attribute_traces[attr_trace_idx].value_syntax(), syntax, "unexpected value for attribute {name}" ); |