diff options
author | Martin Fischer <martin@push-f.com> | 2021-11-30 17:09:08 +0100 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2021-12-05 02:52:36 +0100 |
commit | 1f99ea9e16f85945e2606905ed6345519ce16e4e (patch) | |
tree | e2c689ac735f08c8d683d84be0292f1e20d7051a /src | |
parent | 91c0008023746a9ffdd01b9b87f89a2ef4ebb01e (diff) |
spans: make Emitter generic over Span
Diffstat (limited to 'src')
-rw-r--r-- | src/emitter.rs | 32 | ||||
-rw-r--r-- | src/tokenizer.rs | 2 |
2 files changed, 20 insertions, 14 deletions
diff --git a/src/emitter.rs b/src/emitter.rs index 0a80544..2c4ba41 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -173,17 +173,17 @@ pub trait Emitter { /// The default implementation of [`crate::Emitter`], used to produce ("emit") tokens. #[derive(Default)] -pub struct DefaultEmitter { +pub struct DefaultEmitter<S> { current_characters: String, - current_token: Option<Token>, + current_token: Option<Token<S>>, last_start_tag: String, current_attribute: Option<(String, String)>, seen_attributes: BTreeSet<String>, - emitted_tokens: VecDeque<Token>, + emitted_tokens: VecDeque<Token<S>>, } -impl DefaultEmitter { - fn emit_token(&mut self, token: Token) { +impl DefaultEmitter<()> { + fn emit_token(&mut self, token: Token<()>) { self.flush_current_characters(); self.emitted_tokens.push_front(token); } @@ -226,8 +226,8 @@ impl DefaultEmitter { } } -impl Emitter for DefaultEmitter { - type Token = Token; +impl Emitter for DefaultEmitter<()> { + type Token = Token<()>; fn set_last_start_tag(&mut self, last_start_tag: Option<&str>) { self.last_start_tag.clear(); @@ -417,7 +417,7 @@ impl Emitter for DefaultEmitter { /// A HTML end/close tag, such as `<p>` or `<a>`. #[derive(Debug, Default, Eq, PartialEq)] -pub struct StartTag { +pub struct StartTag<S> { /// Whether this tag is self-closing. If it is self-closing, no following [`EndTag`] should be /// expected. pub self_closing: bool, @@ -430,9 +430,12 @@ pub struct StartTag { /// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own /// [`Emitter`] to tweak this behavior. pub attributes: BTreeMap<String, String>, + + /// The source code span of the tag name. + pub name_span: S, } -impl StartTag { +impl<S> StartTag<S> { /// Returns the next tokenizer state according to /// [Parsing HTML fragments](https://html.spec.whatwg.org/multipage/parsing.html#concept-frag-parse-context). /// If `scripting` is set to true [`State::RawText`] is returned if this is a `<noscript>` tag, @@ -451,9 +454,12 @@ impl StartTag { /// A HTML end/close tag, such as `</p>` or `</a>`. #[derive(Debug, Default, Eq, PartialEq)] -pub struct EndTag { +pub struct EndTag<S> { /// The ending tag's name, such as `"p"` or `"a"`. pub name: String, + + /// The source code span of the tag name. + pub name_span: S, } /// A doctype. Some examples: @@ -480,11 +486,11 @@ pub struct Doctype { /// The token type used by default. You can define your own token type by implementing the /// [`crate::Emitter`] trait and using [`crate::Tokenizer::new_with_emitter`]. #[derive(Debug, Eq, PartialEq)] -pub enum Token { +pub enum Token<S> { /// A HTML start tag. - StartTag(StartTag), + StartTag(StartTag<S>), /// A HTML end tag. - EndTag(EndTag), + EndTag(EndTag<S>), /// A literal string. String(String), /// A HTML comment. diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b5a2edf..377dd01 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -33,7 +33,7 @@ impl<T: Copy> Stack2<T> { } /// A HTML tokenizer. See crate-level docs for basic usage. -pub struct Tokenizer<R: Reader, E: Emitter = DefaultEmitter> { +pub struct Tokenizer<R: Reader, E: Emitter = DefaultEmitter<()>> { eof: bool, pub(crate) state: InternalState, pub(crate) emitter: E, |