From ad6ac5f0a825775c231e76cdc9016e61e54f4141 Mon Sep 17 00:00:00 2001
From: Martin Fischer <martin@push-f.com>
Date: Tue, 12 Sep 2023 08:19:00 +0200
Subject: break!: rename DefaultEmitter to TracingEmitter

---
 src/default_emitter.rs | 344 -------------------------------------------------
 src/lib.rs             |   4 +-
 src/naive_parser.rs    |   8 +-
 src/tokenizer.rs       |   2 +-
 src/tracing_emitter.rs | 344 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 351 insertions(+), 351 deletions(-)
 delete mode 100644 src/default_emitter.rs
 create mode 100644 src/tracing_emitter.rs

(limited to 'src')

diff --git a/src/default_emitter.rs b/src/default_emitter.rs
deleted file mode 100644
index 7b6c51e..0000000
--- a/src/default_emitter.rs
+++ /dev/null
@@ -1,344 +0,0 @@
-use std::collections::btree_map::Entry;
-use std::collections::BTreeSet;
-use std::collections::VecDeque;
-use std::ops::Range;
-
-use crate::let_else::assume;
-use crate::offset::NoopOffset;
-use crate::offset::Offset;
-use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token};
-use crate::Emitter;
-use crate::Error;
-
-/// The default implementation of [`Emitter`], used to produce tokens.
-pub struct DefaultEmitter<O = NoopOffset> {
-    current_token: Option<Token<O>>,
-    current_attribute_name: String,
-    current_attr_internal: crate::token::AttrInternal<O>,
-    seen_attributes: BTreeSet<String>,
-    emitted_tokens: VecDeque<Token<O>>,
-    errors: VecDeque<(Error, Range<O>)>,
-    attr_in_end_tag_span: Option<Range<O>>,
-}
-
-impl<O: Default> Default for DefaultEmitter<O> {
-    fn default() -> Self {
-        DefaultEmitter {
-            current_token: None,
-            current_attribute_name: String::new(),
-            current_attr_internal: Default::default(),
-            seen_attributes: BTreeSet::new(),
-            emitted_tokens: VecDeque::new(),
-            errors: VecDeque::new(),
-            attr_in_end_tag_span: None,
-        }
-    }
-}
-
-impl<O> DefaultEmitter<O> {
-    /// Removes all encountered tokenizer errors and returns them as an iterator.
-    pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ {
-        self.errors.drain(0..)
-    }
-}
-
-impl<O> Iterator for DefaultEmitter<O> {
-    type Item = Token<O>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.emitted_tokens.pop_back()
-    }
-}
-
-impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
-    fn report_error(&mut self, error: Error, span: Range<O>) {
-        self.errors.push_back((error, span));
-    }
-
-    fn emit_char(&mut self, c: char) {
-        self.emit_token(Token::Char(c));
-    }
-
-    fn emit_eof(&mut self) {
-        self.emit_token(Token::EndOfFile);
-    }
-
-    fn init_start_tag(&mut self, tag_offset: O, name_offset: O) {
-        self.current_token = Some(Token::StartTag(StartTag {
-            span: tag_offset..O::default(),
-            self_closing: false,
-            name: String::new(),
-            attributes: Default::default(),
-            name_span: name_offset..O::default(),
-        }));
-    }
-
-    fn init_end_tag(&mut self, tag_offset: O, name_offset: O) {
-        self.current_token = Some(Token::EndTag(EndTag {
-            span: tag_offset..O::default(),
-            name: String::new(),
-            name_span: name_offset..O::default(),
-        }));
-        self.seen_attributes.clear();
-    }
-
-    fn push_tag_name(&mut self, s: &str) {
-        assume!(
-            Some(Token::StartTag(StartTag { name, .. }) | Token::EndTag(EndTag { name, .. })),
-            &mut self.current_token
-        );
-        name.push_str(s);
-    }
-
-    fn terminate_tag_name(&mut self, offset: O) {
-        assume!(
-            Some(
-                Token::StartTag(StartTag { name_span, .. })
-                    | Token::EndTag(EndTag { name_span, .. })
-            ),
-            &mut self.current_token
-        );
-        name_span.end = offset;
-    }
-
-    fn init_attribute_name(&mut self, offset: O) {
-        self.flush_current_attribute();
-        self.current_attr_internal.name_span.start = offset;
-    }
-
-    fn push_attribute_name(&mut self, s: &str) {
-        self.current_attribute_name.push_str(s);
-    }
-
-    fn terminate_attribute_name(&mut self, offset: O) {
-        self.current_attr_internal.name_span.end = offset;
-    }
-
-    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
-        self.current_attr_internal.value_span.start = offset;
-        self.current_attr_internal.value_syntax = Some(syntax);
-    }
-
-    fn push_attribute_value(&mut self, s: &str) {
-        self.current_attr_internal.value.push_str(s);
-    }
-
-    fn terminate_attribute_value(&mut self, offset: O) {
-        self.current_attr_internal.value_span.end = offset;
-    }
-
-    fn set_self_closing(&mut self, slash_span: Range<O>) {
-        let token = self.current_token.as_mut().unwrap();
-
-        match token {
-            Token::StartTag(tag) => {
-                tag.self_closing = true;
-            }
-            Token::EndTag(_) => {
-                self.report_error(Error::EndTagWithTrailingSolidus, slash_span);
-            }
-            other => debug_assert!(false, "unexpected current_token: {other:?}"),
-        }
-    }
-
-    fn emit_current_tag(&mut self, offset: O) {
-        self.flush_current_attribute();
-        let mut token = self.current_token.take().unwrap();
-        match &mut token {
-            Token::EndTag(tag) => {
-                if !self.seen_attributes.is_empty() {
-                    let span = self.attr_in_end_tag_span.take().unwrap();
-                    self.report_error(Error::EndTagWithAttributes, span);
-                }
-                self.seen_attributes.clear();
-                tag.span.end = offset;
-            }
-            Token::StartTag(tag) => {
-                tag.span.end = offset;
-            }
-            other => {
-                debug_assert!(false, "unexpected current_token: {other:?}");
-                return;
-            }
-        }
-        self.emit_token(token);
-    }
-
-    fn init_comment(&mut self, data_start_offset: O) {
-        self.current_token = Some(Token::Comment(Comment {
-            data: String::new(),
-            data_span: data_start_offset..O::default(),
-        }));
-    }
-
-    fn push_comment(&mut self, s: &str) {
-        assume!(Some(Token::Comment(comment)), &mut self.current_token);
-        comment.data.push_str(s);
-    }
-
-    fn emit_current_comment(&mut self, data_end_offset: O) {
-        let mut token = self.current_token.take().unwrap();
-        assume!(Token::Comment(comment), &mut token);
-        comment.data_span.end = data_end_offset;
-        self.emit_token(token);
-    }
-
-    fn init_doctype(&mut self, offset: O) {
-        self.current_token = Some(Token::Doctype(Doctype {
-            name: None,
-            force_quirks: false,
-            public_id: None,
-            system_id: None,
-            span: offset..O::default(),
-            name_span: O::default()..O::default(),
-            public_id_span: O::default()..O::default(),
-            system_id_span: O::default()..O::default(),
-        }));
-    }
-
-    fn init_doctype_name(&mut self, offset: O) {
-        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
-        doctype.name = Some("".into());
-        doctype.name_span.start = offset;
-    }
-
-    fn push_doctype_name(&mut self, s: &str) {
-        assume!(
-            Some(Token::Doctype(Doctype {
-                name: Some(name),
-                ..
-            })),
-            &mut self.current_token
-        );
-        name.push_str(s);
-    }
-
-    fn terminate_doctype_name(&mut self, offset: O) {
-        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
-        doctype.name_span.end = offset;
-    }
-
-    fn init_doctype_public_id(&mut self, offset: O) {
-        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
-        doctype.public_id = Some("".to_owned());
-        doctype.public_id_span.start = offset;
-    }
-
-    fn push_doctype_public_id(&mut self, s: &str) {
-        assume!(
-            Some(Token::Doctype(Doctype {
-                public_id: Some(public_id),
-                ..
-            })),
-            &mut self.current_token
-        );
-        public_id.push_str(s);
-    }
-
-    fn terminate_doctype_public_id(&mut self, offset: O) {
-        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
-        doctype.public_id_span.end = offset;
-    }
-
-    fn init_doctype_system_id(&mut self, offset: O) {
-        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
-        doctype.system_id = Some("".to_owned());
-        doctype.system_id_span.start = offset;
-    }
-
-    fn push_doctype_system_id(&mut self, s: &str) {
-        assume!(
-            Some(Token::Doctype(Doctype {
-                system_id: Some(id),
-                ..
-            })),
-            &mut self.current_token
-        );
-        id.push_str(s);
-    }
-
-    fn terminate_doctype_system_id(&mut self, offset: O) {
-        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
-        doctype.system_id_span.end = offset;
-    }
-
-    fn set_force_quirks(&mut self) {
-        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
-        doctype.force_quirks = true;
-    }
-
-    fn emit_current_doctype(&mut self, offset: O) {
-        assume!(Some(Token::Doctype(mut doctype)), self.current_token.take());
-        doctype.span.end = offset;
-        self.emit_token(Token::Doctype(doctype));
-    }
-}
-
-impl<O> DefaultEmitter<O> {
-    fn emit_token(&mut self, token: Token<O>) {
-        self.emitted_tokens.push_front(token);
-    }
-
-    fn flush_current_attribute(&mut self)
-    where
-        O: Offset,
-    {
-        if self.current_attribute_name.is_empty() {
-            return;
-        }
-        let name = std::mem::take(&mut self.current_attribute_name);
-        let attr_internal = std::mem::take(&mut self.current_attr_internal);
-
-        match &mut self.current_token {
-            Some(Token::StartTag(tag)) => match tag.attributes.inner.entry(name) {
-                Entry::Vacant(vacant) => {
-                    vacant.insert(attr_internal);
-                }
-                Entry::Occupied(_) => {
-                    self.report_error(Error::DuplicateAttribute, attr_internal.name_span);
-                }
-            },
-            Some(Token::EndTag(_)) => {
-                self.attr_in_end_tag_span = Some(attr_internal.name_span.clone());
-                if !self.seen_attributes.insert(name) {
-                    self.report_error(Error::DuplicateAttribute, attr_internal.name_span);
-                }
-            }
-            other => debug_assert!(false, "unexpected current_token: {other:?}"),
-        }
-    }
-}
-
-/// The majority of our testing of the [`DefaultEmitter`] is done against the
-/// html5lib-tests in the html5lib integration test. This module only tests
-/// details that aren't present in the html5lib test data.
-#[cfg(test)]
-mod tests {
-    use super::DefaultEmitter;
-    use crate::token::{AttrValueSyntax, Token};
-    use crate::{Event, Tokenizer};
-
-    #[test]
-    fn test_attribute_value_syntax() {
-        let mut tokenizer = Tokenizer::new(
-            "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
-            DefaultEmitter::default(),
-        )
-        .flatten();
-        let Event::Token(Token::StartTag(tag)) = tokenizer.next().unwrap() else {
-            panic!("expected start tag");
-        };
-        for (name, syntax) in [
-            ("empty", None),
-            ("unquoted", Some(AttrValueSyntax::Unquoted)),
-            ("single-quoted", Some(AttrValueSyntax::SingleQuoted)),
-            ("double-quoted", Some(AttrValueSyntax::DoubleQuoted)),
-        ] {
-            assert_eq!(
-                tag.attributes.get(name).unwrap().value_syntax(),
-                syntax,
-                "unexpected value for attribute {name}"
-            );
-        }
-    }
-}
diff --git a/src/lib.rs b/src/lib.rs
index 40b691a..aecbef3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,13 +7,13 @@
 #![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))]
 #![doc = include_str!("../README.md")]
 
-mod default_emitter;
 mod emitter;
 mod entities;
 mod error;
 mod let_else;
 mod naive_parser;
 mod tokenizer;
+mod tracing_emitter;
 
 /// Types for HTML attributes.
 pub mod attr {
@@ -25,12 +25,12 @@ pub mod offset;
 pub mod reader;
 pub mod token;
 
-pub use default_emitter::DefaultEmitter;
 pub use emitter::Emitter;
 pub use error::Error;
 pub use naive_parser::NaiveParser;
 pub use token::{Comment, Doctype, EndTag, StartTag, Token};
 pub use tokenizer::{CdataAction, Event, State, Tokenizer};
+pub use tracing_emitter::TracingEmitter;
 
 #[cfg(feature = "integration-tests")]
 pub use tokenizer::InternalState;
diff --git a/src/naive_parser.rs b/src/naive_parser.rs
index 4988477..91edbc0 100644
--- a/src/naive_parser.rs
+++ b/src/naive_parser.rs
@@ -1,7 +1,7 @@
-use crate::default_emitter::DefaultEmitter;
 use crate::offset::{Offset, Position};
 use crate::reader::{IntoReader, Reader};
 use crate::tokenizer::CdataAction;
+use crate::tracing_emitter::TracingEmitter;
 use crate::{Emitter, Event, State, Tokenizer};
 
 /// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction).
@@ -30,18 +30,18 @@ pub struct NaiveParser<R: Reader, O: Offset, E: Emitter<O>> {
     tokenizer: Tokenizer<R, O, E>,
 }
 
-impl<R, O> NaiveParser<R, O, DefaultEmitter<O>>
+impl<R, O> NaiveParser<R, O, TracingEmitter<O>>
 where
     R: Reader + Position<O>,
     O: Offset,
 {
     /// Constructs a new naive parser.
     // TODO: add example for NaiveParser::new
-    pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, DefaultEmitter<O>>
+    pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, TracingEmitter<O>>
     where
         IR: IntoReader<'a, Reader = R>,
     {
-        NaiveParser::new_with_emitter(reader, DefaultEmitter::default())
+        NaiveParser::new_with_emitter(reader, TracingEmitter::default())
     }
 }
 
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7c38e49..d0e2eaf 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -15,7 +15,7 @@ pub use machine::State as InternalState;
 /// Iterating over the tokenizer directly without calling [`Tokenizer::set_state`]
 /// results in wrong state transitions:
 ///
-/// ```
+/// ```ignore TODO: unignore once the BasicEmitter has been implemented
 /// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token};
 /// let emitter = DefaultEmitter::default();
 /// let html = "<script><b>";
diff --git a/src/tracing_emitter.rs b/src/tracing_emitter.rs
new file mode 100644
index 0000000..408d9b0
--- /dev/null
+++ b/src/tracing_emitter.rs
@@ -0,0 +1,344 @@
+use std::collections::btree_map::Entry;
+use std::collections::BTreeSet;
+use std::collections::VecDeque;
+use std::ops::Range;
+
+use crate::let_else::assume;
+use crate::offset::NoopOffset;
+use crate::offset::Offset;
+use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token};
+use crate::Emitter;
+use crate::Error;
+
+/// The default implementation of [`Emitter`], used to produce tokens.
+pub struct TracingEmitter<O = NoopOffset> {
+    current_token: Option<Token<O>>,
+    current_attribute_name: String,
+    current_attr_internal: crate::token::AttrInternal<O>,
+    seen_attributes: BTreeSet<String>,
+    emitted_tokens: VecDeque<Token<O>>,
+    errors: VecDeque<(Error, Range<O>)>,
+    attr_in_end_tag_span: Option<Range<O>>,
+}
+
+impl<O: Default> Default for TracingEmitter<O> {
+    fn default() -> Self {
+        TracingEmitter {
+            current_token: None,
+            current_attribute_name: String::new(),
+            current_attr_internal: Default::default(),
+            seen_attributes: BTreeSet::new(),
+            emitted_tokens: VecDeque::new(),
+            errors: VecDeque::new(),
+            attr_in_end_tag_span: None,
+        }
+    }
+}
+
+impl<O> TracingEmitter<O> {
+    /// Removes all encountered tokenizer errors and returns them as an iterator.
+    pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ {
+        self.errors.drain(0..)
+    }
+}
+
+impl<O> Iterator for TracingEmitter<O> {
+    type Item = Token<O>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.emitted_tokens.pop_back()
+    }
+}
+
+impl<O: Offset> Emitter<O> for TracingEmitter<O> {
+    fn report_error(&mut self, error: Error, span: Range<O>) {
+        self.errors.push_back((error, span));
+    }
+
+    fn emit_char(&mut self, c: char) {
+        self.emit_token(Token::Char(c));
+    }
+
+    fn emit_eof(&mut self) {
+        self.emit_token(Token::EndOfFile);
+    }
+
+    fn init_start_tag(&mut self, tag_offset: O, name_offset: O) {
+        self.current_token = Some(Token::StartTag(StartTag {
+            span: tag_offset..O::default(),
+            self_closing: false,
+            name: String::new(),
+            attributes: Default::default(),
+            name_span: name_offset..O::default(),
+        }));
+    }
+
+    fn init_end_tag(&mut self, tag_offset: O, name_offset: O) {
+        self.current_token = Some(Token::EndTag(EndTag {
+            span: tag_offset..O::default(),
+            name: String::new(),
+            name_span: name_offset..O::default(),
+        }));
+        self.seen_attributes.clear();
+    }
+
+    fn push_tag_name(&mut self, s: &str) {
+        assume!(
+            Some(Token::StartTag(StartTag { name, .. }) | Token::EndTag(EndTag { name, .. })),
+            &mut self.current_token
+        );
+        name.push_str(s);
+    }
+
+    fn terminate_tag_name(&mut self, offset: O) {
+        assume!(
+            Some(
+                Token::StartTag(StartTag { name_span, .. })
+                    | Token::EndTag(EndTag { name_span, .. })
+            ),
+            &mut self.current_token
+        );
+        name_span.end = offset;
+    }
+
+    fn init_attribute_name(&mut self, offset: O) {
+        self.flush_current_attribute();
+        self.current_attr_internal.name_span.start = offset;
+    }
+
+    fn push_attribute_name(&mut self, s: &str) {
+        self.current_attribute_name.push_str(s);
+    }
+
+    fn terminate_attribute_name(&mut self, offset: O) {
+        self.current_attr_internal.name_span.end = offset;
+    }
+
+    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
+        self.current_attr_internal.value_span.start = offset;
+        self.current_attr_internal.value_syntax = Some(syntax);
+    }
+
+    fn push_attribute_value(&mut self, s: &str) {
+        self.current_attr_internal.value.push_str(s);
+    }
+
+    fn terminate_attribute_value(&mut self, offset: O) {
+        self.current_attr_internal.value_span.end = offset;
+    }
+
+    fn set_self_closing(&mut self, slash_span: Range<O>) {
+        let token = self.current_token.as_mut().unwrap();
+
+        match token {
+            Token::StartTag(tag) => {
+                tag.self_closing = true;
+            }
+            Token::EndTag(_) => {
+                self.report_error(Error::EndTagWithTrailingSolidus, slash_span);
+            }
+            other => debug_assert!(false, "unexpected current_token: {other:?}"),
+        }
+    }
+
+    fn emit_current_tag(&mut self, offset: O) {
+        self.flush_current_attribute();
+        let mut token = self.current_token.take().unwrap();
+        match &mut token {
+            Token::EndTag(tag) => {
+                if !self.seen_attributes.is_empty() {
+                    let span = self.attr_in_end_tag_span.take().unwrap();
+                    self.report_error(Error::EndTagWithAttributes, span);
+                }
+                self.seen_attributes.clear();
+                tag.span.end = offset;
+            }
+            Token::StartTag(tag) => {
+                tag.span.end = offset;
+            }
+            other => {
+                debug_assert!(false, "unexpected current_token: {other:?}");
+                return;
+            }
+        }
+        self.emit_token(token);
+    }
+
+    fn init_comment(&mut self, data_start_offset: O) {
+        self.current_token = Some(Token::Comment(Comment {
+            data: String::new(),
+            data_span: data_start_offset..O::default(),
+        }));
+    }
+
+    fn push_comment(&mut self, s: &str) {
+        assume!(Some(Token::Comment(comment)), &mut self.current_token);
+        comment.data.push_str(s);
+    }
+
+    fn emit_current_comment(&mut self, data_end_offset: O) {
+        let mut token = self.current_token.take().unwrap();
+        assume!(Token::Comment(comment), &mut token);
+        comment.data_span.end = data_end_offset;
+        self.emit_token(token);
+    }
+
+    fn init_doctype(&mut self, offset: O) {
+        self.current_token = Some(Token::Doctype(Doctype {
+            name: None,
+            force_quirks: false,
+            public_id: None,
+            system_id: None,
+            span: offset..O::default(),
+            name_span: O::default()..O::default(),
+            public_id_span: O::default()..O::default(),
+            system_id_span: O::default()..O::default(),
+        }));
+    }
+
+    fn init_doctype_name(&mut self, offset: O) {
+        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
+        doctype.name = Some("".into());
+        doctype.name_span.start = offset;
+    }
+
+    fn push_doctype_name(&mut self, s: &str) {
+        assume!(
+            Some(Token::Doctype(Doctype {
+                name: Some(name),
+                ..
+            })),
+            &mut self.current_token
+        );
+        name.push_str(s);
+    }
+
+    fn terminate_doctype_name(&mut self, offset: O) {
+        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
+        doctype.name_span.end = offset;
+    }
+
+    fn init_doctype_public_id(&mut self, offset: O) {
+        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
+        doctype.public_id = Some("".to_owned());
+        doctype.public_id_span.start = offset;
+    }
+
+    fn push_doctype_public_id(&mut self, s: &str) {
+        assume!(
+            Some(Token::Doctype(Doctype {
+                public_id: Some(public_id),
+                ..
+            })),
+            &mut self.current_token
+        );
+        public_id.push_str(s);
+    }
+
+    fn terminate_doctype_public_id(&mut self, offset: O) {
+        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
+        doctype.public_id_span.end = offset;
+    }
+
+    fn init_doctype_system_id(&mut self, offset: O) {
+        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
+        doctype.system_id = Some("".to_owned());
+        doctype.system_id_span.start = offset;
+    }
+
+    fn push_doctype_system_id(&mut self, s: &str) {
+        assume!(
+            Some(Token::Doctype(Doctype {
+                system_id: Some(id),
+                ..
+            })),
+            &mut self.current_token
+        );
+        id.push_str(s);
+    }
+
+    fn terminate_doctype_system_id(&mut self, offset: O) {
+        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
+        doctype.system_id_span.end = offset;
+    }
+
+    fn set_force_quirks(&mut self) {
+        assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
+        doctype.force_quirks = true;
+    }
+
+    fn emit_current_doctype(&mut self, offset: O) {
+        assume!(Some(Token::Doctype(mut doctype)), self.current_token.take());
+        doctype.span.end = offset;
+        self.emit_token(Token::Doctype(doctype));
+    }
+}
+
+impl<O> TracingEmitter<O> {
+    fn emit_token(&mut self, token: Token<O>) {
+        self.emitted_tokens.push_front(token);
+    }
+
+    fn flush_current_attribute(&mut self)
+    where
+        O: Offset,
+    {
+        if self.current_attribute_name.is_empty() {
+            return;
+        }
+        let name = std::mem::take(&mut self.current_attribute_name);
+        let attr_internal = std::mem::take(&mut self.current_attr_internal);
+
+        match &mut self.current_token {
+            Some(Token::StartTag(tag)) => match tag.attributes.inner.entry(name) {
+                Entry::Vacant(vacant) => {
+                    vacant.insert(attr_internal);
+                }
+                Entry::Occupied(_) => {
+                    self.report_error(Error::DuplicateAttribute, attr_internal.name_span);
+                }
+            },
+            Some(Token::EndTag(_)) => {
+                self.attr_in_end_tag_span = Some(attr_internal.name_span.clone());
+                if !self.seen_attributes.insert(name) {
+                    self.report_error(Error::DuplicateAttribute, attr_internal.name_span);
+                }
+            }
+            other => debug_assert!(false, "unexpected current_token: {other:?}"),
+        }
+    }
+}
+
+/// The majority of our testing of the [`TracingEmitter`] is done against the
+/// html5lib-tests in the html5lib integration test. This module only tests
+/// details that aren't present in the html5lib test data.
+#[cfg(test)]
+mod tests {
+    use super::TracingEmitter;
+    use crate::token::{AttrValueSyntax, Token};
+    use crate::{Event, Tokenizer};
+
+    #[test]
+    fn test_attribute_value_syntax() {
+        let mut tokenizer = Tokenizer::new(
+            "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
+            TracingEmitter::default(),
+        )
+        .flatten();
+        let Event::Token(Token::StartTag(tag)) = tokenizer.next().unwrap() else {
+            panic!("expected start tag");
+        };
+        for (name, syntax) in [
+            ("empty", None),
+            ("unquoted", Some(AttrValueSyntax::Unquoted)),
+            ("single-quoted", Some(AttrValueSyntax::SingleQuoted)),
+            ("double-quoted", Some(AttrValueSyntax::DoubleQuoted)),
+        ] {
+            assert_eq!(
+                tag.attributes.get(name).unwrap().value_syntax(),
+                syntax,
+                "unexpected value for attribute {name}"
+            );
+        }
+    }
+}
-- 
cgit v1.2.3