spans: make Emitter generic over Span

author: Martin Fischer <martin@push-f.com> 2021-11-30 17:09:08 +0100
committer: Martin Fischer <martin@push-f.com> 2021-12-05 02:52:36 +0100
commit: 1f99ea9e16f85945e2606905ed6345519ce16e4e (patch)
tree: e2c689ac735f08c8d683d84be0292f1e20d7051a
parent: 91c0008023746a9ffdd01b9b87f89a2ef4ebb01e (diff)
3 files changed, 28 insertions, 17 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index 0a80544..2c4ba41 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -173,17 +173,17 @@ pub trait Emitter {
 
 /// The default implementation of [`crate::Emitter`], used to produce ("emit") tokens.
 #[derive(Default)]
-pub struct DefaultEmitter {
+pub struct DefaultEmitter<S> {
     current_characters: String,
-    current_token: Option<Token>,
+    current_token: Option<Token<S>>,
     last_start_tag: String,
     current_attribute: Option<(String, String)>,
     seen_attributes: BTreeSet<String>,
-    emitted_tokens: VecDeque<Token>,
+    emitted_tokens: VecDeque<Token<S>>,
 }
 
-impl DefaultEmitter {
-    fn emit_token(&mut self, token: Token) {
+impl DefaultEmitter<()> {
+    fn emit_token(&mut self, token: Token<()>) {
         self.flush_current_characters();
         self.emitted_tokens.push_front(token);
     }
@@ -226,8 +226,8 @@ impl DefaultEmitter {
     }
 }
 
-impl Emitter for DefaultEmitter {
-    type Token = Token;
+impl Emitter for DefaultEmitter<()> {
+    type Token = Token<()>;
 
     fn set_last_start_tag(&mut self, last_start_tag: Option<&str>) {
         self.last_start_tag.clear();
@@ -417,7 +417,7 @@ impl Emitter for DefaultEmitter {
 
 /// A HTML end/close tag, such as `<p>` or `<a>`.
 #[derive(Debug, Default, Eq, PartialEq)]
-pub struct StartTag {
+pub struct StartTag<S> {
     /// Whether this tag is self-closing. If it is self-closing, no following [`EndTag`] should be
     /// expected.
     pub self_closing: bool,
@@ -430,9 +430,12 @@ pub struct StartTag {
     /// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own
     /// [`Emitter`] to tweak this behavior.
     pub attributes: BTreeMap<String, String>,
+
+    /// The source code span of the tag name.
+    pub name_span: S,
 }
 
-impl StartTag {
+impl<S> StartTag<S> {
     /// Returns the next tokenizer state according to
     /// [Parsing HTML fragments](https://html.spec.whatwg.org/multipage/parsing.html#concept-frag-parse-context).
     /// If `scripting` is set to true [`State::RawText`] is returned if this is a `<noscript>` tag,
@@ -451,9 +454,12 @@ impl StartTag {
 
 /// A HTML end/close tag, such as `</p>` or `</a>`.
 #[derive(Debug, Default, Eq, PartialEq)]
-pub struct EndTag {
+pub struct EndTag<S> {
     /// The ending tag's name, such as `"p"` or `"a"`.
     pub name: String,
+
+    /// The source code span of the tag name.
+    pub name_span: S,
 }
 
 /// A doctype. Some examples:
@@ -480,11 +486,11 @@ pub struct Doctype {
 /// The token type used by default. You can define your own token type by implementing the
 /// [`crate::Emitter`] trait and using [`crate::Tokenizer::new_with_emitter`].
 #[derive(Debug, Eq, PartialEq)]
-pub enum Token {
+pub enum Token<S> {
     /// A HTML start tag.
-    StartTag(StartTag),
+    StartTag(StartTag<S>),
     /// A HTML end tag.
-    EndTag(EndTag),
+    EndTag(EndTag<S>),
     /// A literal string.
     String(String),
     /// A HTML comment.
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index b5a2edf..377dd01 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -33,7 +33,7 @@ impl<T: Copy> Stack2<T> {
 }
 
 /// A HTML tokenizer. See crate-level docs for basic usage.
-pub struct Tokenizer<R: Reader, E: Emitter = DefaultEmitter> {
+pub struct Tokenizer<R: Reader, E: Emitter = DefaultEmitter<()>> {
     eof: bool,
     pub(crate) state: InternalState,
     pub(crate) emitter: E,
diff --git a/tests/test_html5lib.rs b/tests/test_html5lib.rs
index cb11a00..5668217 100644
--- a/tests/test_html5lib.rs
+++ b/tests/test_html5lib.rs
@@ -10,7 +10,7 @@ compile_error!(
     "integration tests need the integration-tests feature enabled. Run cargo test --all-features"
 );
 
-struct ExpectedOutputTokens(Vec<Token>);
+struct ExpectedOutputTokens(Vec<Token<()>>);
 
 impl<'de> Deserialize<'de> for ExpectedOutputTokens {
     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
@@ -80,19 +80,24 @@ impl<'de> Deserialize<'de> for ExpectedOutputTokens {
                         self_closing: false,
                         name,
                         attributes,
+                        name_span: (),
                     }),
                     OutputToken::StartTag2(_, name, attributes, self_closing) => {
                         Token::StartTag(StartTag {
                             self_closing,
                             name,
                             attributes,
+                            name_span: (),
                         })
                     }
-                    OutputToken::EndTag(_, name) => Token::EndTag(EndTag { name }),
+                    OutputToken::EndTag(_, name) => Token::EndTag(EndTag {
+                        name,
+                        name_span: (),
+                    }),
                     OutputToken::Comment(_, data) => Token::Comment(data),
                     OutputToken::Character(_, data) => Token::String(data),
                 })
-                .collect::<Vec<Token>>(),
+                .collect::<Vec<Token<()>>>(),
         ))
     }
 }
author	Martin Fischer <martin@push-f.com>	2021-11-30 17:09:08 +0100
committer	Martin Fischer <martin@push-f.com>	2021-12-05 02:52:36 +0100
commit	1f99ea9e16f85945e2606905ed6345519ce16e4e (patch)
tree	e2c689ac735f08c8d683d84be0292f1e20d7051a
parent	91c0008023746a9ffdd01b9b87f89a2ef4ebb01e (diff)