aboutsummaryrefslogtreecommitdiff
path: root/src/emitter.rs
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-09 19:10:49 +0200
committerMartin Fischer <martin@push-f.com>2023-09-09 23:02:47 +0200
commit1d8e6239875c810197a0679a20412726afb8ff66 (patch)
tree3daa917d720235117770040405a78dad548032e9 /src/emitter.rs
parent77eb3eefeeab3ba7ab3c6387e5916192b95b482d (diff)
refactor: merge token types with attr to new token module
Diffstat (limited to 'src/emitter.rs')
-rw-r--r--src/emitter.rs145
1 files changed, 6 insertions, 139 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index f27c778..23f9ede 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -4,9 +4,9 @@ use std::collections::VecDeque;
use std::mem;
use std::ops::Range;
-use crate::attr::AttrValueSyntax;
use crate::offset::NoopOffset;
use crate::offset::Offset;
+use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token};
use crate::Error;
/// An emitter is an object providing methods to the tokenizer to produce ("emit") tokens.
@@ -214,7 +214,7 @@ pub trait Emitter<O> {
pub struct DefaultEmitter<O = NoopOffset> {
current_characters: String,
current_token: Option<Token<O>>,
- current_attribute: Option<(String, crate::attr::AttrInternal<O>)>,
+ current_attribute: Option<(String, crate::token::AttrInternal<O>)>,
seen_attributes: BTreeSet<String>,
emitted_tokens: VecDeque<Token<O>>,
attr_in_end_tag_span: Option<Range<O>>,
@@ -465,7 +465,7 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
self.flush_current_attribute();
self.current_attribute = Some((
String::new(),
- crate::attr::AttrInternal {
+ crate::token::AttrInternal {
name_span: offset..O::default(),
value: String::new(),
value_span: O::default()..O::default(),
@@ -564,147 +564,14 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
}
}
-/// An HTML start tag, such as `<p>` or `<a>`.
-#[derive(Debug, Eq, PartialEq)]
-pub struct StartTag<O> {
- /// Whether this tag is self-closing. If it is self-closing, no following [`EndTag`] should be
- /// expected.
- pub self_closing: bool,
-
- /// The tag name.
- /// Uppercase ASCII characters (A-Z) have been converted to lowercase.
- pub name: String,
-
- /// A mapping for any HTML attributes this start tag may have.
- ///
- /// Duplicate attributes are ignored after the first one as per WHATWG spec.
- pub attributes: crate::attr::AttributeMap<O>,
-
- /// The source code span of the tag.
- pub span: Range<O>,
-
- /// The span of the tag name.
- pub name_span: Range<O>,
-}
-
-/// An HTML end/close tag, such as `</p>` or `</a>`.
-#[derive(Debug, Eq, PartialEq)]
-pub struct EndTag<O> {
- /// The tag name.
- /// Uppercase ASCII characters (A-Z) have been converted to lowercase.
- pub name: String,
-
- /// The source code span of the tag.
- pub span: Range<O>,
-
- /// The span of the tag name.
- pub name_span: Range<O>,
-}
-
-/// An HTML comment.
-#[derive(PartialEq, Eq, Debug)]
-pub struct Comment<O> {
- /// The text within the comment.
- pub data: String,
- /// The source offset of the comment data.
- pub data_span: Range<O>,
-}
-
-impl<O: Offset> Comment<O> {
- /// Returns the span for the comment data.
- pub fn data_span(&self) -> Range<O> {
- self.data_span.clone()
- }
-}
-
-/// A doctype. Some examples:
-///
-/// * `<!DOCTYPE {name}>`
-/// * `<!DOCTYPE {name} PUBLIC '{public_id}'>`
-/// * `<!DOCTYPE {name} SYSTEM '{system_id}'>`
-/// * `<!DOCTYPE {name} PUBLIC '{public_id}' '{system_id}'>`
-#[derive(Debug, Eq, PartialEq)]
-pub struct Doctype<O> {
- /// The [force-quirks flag].
- ///
- /// [force-quirks flag]: https://html.spec.whatwg.org/multipage/parsing.html#force-quirks-flag
- pub force_quirks: bool,
-
- /// The doctype's name. Uppercase ASCII characters (A-Z) have been
- /// converted to lowercase. For HTML documents this should be "html".
- pub name: Option<String>,
-
- /// The doctype's public identifier.
- pub public_id: Option<String>,
-
- /// The doctype's system identifier.
- pub system_id: Option<String>,
-
- /// The source code span of the doctype.
- pub span: Range<O>,
-
- /// The span of the name.
- name_span: Range<O>,
-
- /// The span of the public identifier.
- public_id_span: Range<O>,
-
- /// The span of the system identifier.
- system_id_span: Range<O>,
-}
-
-impl<O: Offset> Doctype<O> {
- /// Returns the span of the name.
- pub fn name_span(&self) -> Option<Range<O>> {
- self.name.as_ref()?;
- Some(self.name_span.clone())
- }
-
- /// Returns the span of the public identifier.
- pub fn public_id_span(&self) -> Option<Range<O>> {
- self.public_id.as_ref()?;
- Some(self.public_id_span.clone())
- }
-
- /// Returns the span of the system identifier.
- pub fn system_id_span(&self) -> Option<Range<O>> {
- self.system_id.as_ref()?;
- Some(self.system_id_span.clone())
- }
-}
-
-/// A type for the tokens emitted by a WHATWG-compliant HTML tokenizer.
-#[derive(Debug, Eq, PartialEq)]
-pub enum Token<O> {
- /// An HTML start tag.
- StartTag(StartTag<O>),
- /// An HTML end tag.
- EndTag(EndTag<O>),
- /// A literal string. Character references have been resolved.
- String(String),
- /// An HTML comment.
- Comment(Comment<O>),
- /// An HTML doctype declaration.
- Doctype(Doctype<O>),
- /// An HTML parsing error.
- ///
- /// Can be skipped over, the tokenizer is supposed to recover from the error and continues with
- /// more tokens afterward.
- Error {
- /// What kind of error occurred.
- error: Error,
- /// The source code span of the error.
- span: Range<O>,
- },
-}
-
/// The majority of our testing of the [`DefaultEmitter`] is done against the
/// html5lib-tests in the html5lib integration test. This module only tests
/// details that aren't present in the html5lib test data.
#[cfg(test)]
mod tests {
- use super::{DefaultEmitter, Token};
- use crate::{attr::AttrValueSyntax, Event, Tokenizer};
+ use super::DefaultEmitter;
+ use crate::token::{AttrValueSyntax, Token};
+ use crate::{Event, Tokenizer};
#[test]
fn test_attribute_value_syntax() {