summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-10 06:22:38 +0200
committerMartin Fischer <martin@push-f.com>2023-09-11 09:05:44 +0200
commit0695ee9ca63cc3faa51378b35381d62f5a00b846 (patch)
tree5d7483bdf96f542a5647988f5e09b5e4f8167832 /src
parent1d8e6239875c810197a0679a20412726afb8ff66 (diff)
chore: move DefaultEmitter to own module
Diffstat (limited to 'src')
-rw-r--r--src/default_emitter.rs427
-rw-r--r--src/emitter.rs424
-rw-r--r--src/lib.rs4
-rw-r--r--src/naive_parser.rs2
4 files changed, 432 insertions, 425 deletions
diff --git a/src/default_emitter.rs b/src/default_emitter.rs
new file mode 100644
index 0000000..c957b20
--- /dev/null
+++ b/src/default_emitter.rs
@@ -0,0 +1,427 @@
+use std::collections::btree_map::Entry;
+use std::collections::BTreeSet;
+use std::collections::VecDeque;
+use std::mem;
+use std::ops::Range;
+
+use crate::offset::NoopOffset;
+use crate::offset::Offset;
+use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token};
+use crate::Emitter;
+use crate::Error;
+
+/// The default implementation of [`Emitter`], used to produce tokens.
+///
+/// # Warning
+///
+/// * Using the DefaultEmitter without calling [`Tokenizer::set_state`]
+/// results in wrong state transitions:
+///
+/// ```
+/// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token};
+/// let emitter = DefaultEmitter::default();
+/// let html = "<script><b>";
+/// let mut tokens = Tokenizer::new(html, emitter).flatten();
+/// assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "script"));
+/// assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "b"));
+/// ```
+///
+/// [`Tokenizer::set_state`]: crate::Tokenizer::set_state
+pub struct DefaultEmitter<O = NoopOffset> {
+ current_characters: String,
+ current_token: Option<Token<O>>,
+ current_attribute: Option<(String, crate::token::AttrInternal<O>)>,
+ seen_attributes: BTreeSet<String>,
+ emitted_tokens: VecDeque<Token<O>>,
+ attr_in_end_tag_span: Option<Range<O>>,
+}
+
+impl<O> Default for DefaultEmitter<O> {
+ fn default() -> Self {
+ DefaultEmitter {
+ current_characters: String::new(),
+ current_token: None,
+ current_attribute: None,
+ seen_attributes: BTreeSet::new(),
+ emitted_tokens: VecDeque::new(),
+ attr_in_end_tag_span: None,
+ }
+ }
+}
+
+impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
+ type Token = Token<O>;
+
+ fn emit_eof(&mut self) {
+ self.flush_current_characters();
+ }
+
+ fn emit_error(&mut self, error: Error, span: Range<O>) {
+ self.push_error(error, span);
+ }
+
+ fn pop_token(&mut self) -> Option<Self::Token> {
+ self.emitted_tokens.pop_back()
+ }
+
+ fn emit_string(&mut self, s: &str) {
+ self.current_characters.push_str(s);
+ }
+
+ fn init_start_tag(&mut self, tag_offset: O, name_offset: O) {
+ self.current_token = Some(Token::StartTag(StartTag {
+ span: tag_offset..O::default(),
+ self_closing: false,
+ name: String::new(),
+ attributes: Default::default(),
+ name_span: name_offset..O::default(),
+ }));
+ }
+ fn init_end_tag(&mut self, tag_offset: O, name_offset: O) {
+ self.current_token = Some(Token::EndTag(EndTag {
+ span: tag_offset..O::default(),
+ name: String::new(),
+ name_span: name_offset..O::default(),
+ }));
+ self.seen_attributes.clear();
+ }
+
+ fn init_comment(&mut self, data_start_offset: O) {
+ self.current_token = Some(Token::Comment(Comment {
+ data: String::new(),
+ data_span: data_start_offset..O::default(),
+ }));
+ }
+ fn emit_current_tag(&mut self, offset: O) {
+ self.flush_current_attribute();
+ let mut token = self.current_token.take().unwrap();
+ match &mut token {
+ Token::EndTag(tag) => {
+ if !self.seen_attributes.is_empty() {
+ let span = self.attr_in_end_tag_span.take().unwrap();
+ self.push_error(Error::EndTagWithAttributes, span);
+ }
+ self.seen_attributes.clear();
+ tag.span.end = offset;
+ }
+ Token::StartTag(tag) => {
+ tag.span.end = offset;
+ }
+ _ => debug_assert!(false),
+ }
+ self.emit_token(token);
+ }
+ fn emit_current_comment(&mut self, data_end_offset: O) {
+ let mut token = self.current_token.take().unwrap();
+ if let Token::Comment(comment) = &mut token {
+ comment.data_span.end = data_end_offset;
+ } else {
+ debug_assert!(false);
+ }
+ self.emit_token(token);
+ }
+
+ fn emit_current_doctype(&mut self, offset: O) {
+ let Some(Token::Doctype(mut doctype)) = self.current_token.take() else {
+ debug_assert!(false);
+ return;
+ };
+ doctype.span.end = offset;
+ self.emit_token(Token::Doctype(doctype));
+ }
+
+ fn set_self_closing(&mut self, slash_span: Range<O>) {
+ let tag = self.current_token.as_mut().unwrap();
+ match tag {
+ Token::StartTag(StartTag {
+ ref mut self_closing,
+ ..
+ }) => {
+ *self_closing = true;
+ }
+ Token::EndTag(_) => {
+ self.emit_error(Error::EndTagWithTrailingSolidus, slash_span);
+ }
+ _ => {
+ debug_assert!(false);
+ }
+ }
+ }
+ fn set_force_quirks(&mut self) {
+ match self.current_token {
+ Some(Token::Doctype(ref mut doctype)) => doctype.force_quirks = true,
+ _ => debug_assert!(false),
+ }
+ }
+ fn push_tag_name(&mut self, s: &str) {
+ match self.current_token {
+ Some(Token::StartTag(StartTag { ref mut name, .. })) => {
+ name.push_str(s);
+ }
+ Some(Token::EndTag(EndTag { ref mut name, .. })) => {
+ name.push_str(s);
+ }
+ _ => debug_assert!(false),
+ }
+ }
+
+ fn terminate_tag_name(&mut self, offset: O) {
+ match self.current_token {
+ Some(Token::StartTag(StartTag {
+ ref mut name_span, ..
+ })) => {
+ name_span.end = offset;
+ }
+ Some(Token::EndTag(EndTag {
+ ref mut name_span, ..
+ })) => {
+ name_span.end = offset;
+ }
+ _ => debug_assert!(false),
+ }
+ }
+
+ fn push_comment(&mut self, s: &str) {
+ match self.current_token {
+ Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s),
+ _ => debug_assert!(false),
+ }
+ }
+
+ fn init_doctype_name(&mut self, offset: O) {
+ let Some(Token::Doctype(doctype)) = &mut self.current_token else {
+ debug_assert!(false);
+ return;
+ };
+ doctype.name = Some("".into());
+ doctype.name_span.start = offset;
+ }
+
+ fn push_doctype_name(&mut self, s: &str) {
+ match self.current_token {
+ Some(Token::Doctype(Doctype {
+ name: Some(ref mut name),
+ ..
+ })) => name.push_str(s),
+ _ => debug_assert!(false),
+ }
+ }
+
+ fn terminate_doctype_name(&mut self, offset: O) {
+ let Some(Token::Doctype(doctype)) = &mut self.current_token else {
+ debug_assert!(false);
+ return;
+ };
+ doctype.name_span.end = offset;
+ }
+
+ fn init_doctype(&mut self, offset: O) {
+ self.current_token = Some(Token::Doctype(Doctype {
+ name: None,
+ force_quirks: false,
+ public_id: None,
+ system_id: None,
+ span: offset..O::default(),
+ name_span: O::default()..O::default(),
+ public_id_span: O::default()..O::default(),
+ system_id_span: O::default()..O::default(),
+ }));
+ }
+
+ fn init_attribute_name(&mut self, offset: O) {
+ self.flush_current_attribute();
+ self.current_attribute = Some((
+ String::new(),
+ crate::token::AttrInternal {
+ name_span: offset..O::default(),
+ value: String::new(),
+ value_span: O::default()..O::default(),
+ value_syntax: None,
+ },
+ ));
+ }
+ fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
+ let (_, current_attribute) = self.current_attribute.as_mut().unwrap();
+ current_attribute.value_span.start = offset;
+ current_attribute.value_syntax = Some(syntax);
+ }
+
+ fn push_attribute_name(&mut self, s: &str) {
+ let current_attr = self.current_attribute.as_mut().unwrap();
+ current_attr.0.push_str(s);
+ }
+
+ fn terminate_attribute_name(&mut self, offset: O) {
+ let current_attr = self.current_attribute.as_mut().unwrap();
+ current_attr.1.name_span.end = offset;
+ }
+
+ fn push_attribute_value(&mut self, s: &str) {
+ let current_attr = self.current_attribute.as_mut().unwrap();
+ current_attr.1.value.push_str(s);
+ }
+
+ fn terminate_attribute_value(&mut self, offset: O) {
+ let current_attr = self.current_attribute.as_mut().unwrap();
+ current_attr.1.value_span.end = offset;
+ }
+
+ fn init_doctype_public_id(&mut self, offset: O) {
+ let Some(Token::Doctype(doctype)) = &mut self.current_token else {
+ debug_assert!(false);
+ return;
+ };
+ doctype.public_id = Some("".to_owned());
+ doctype.public_id_span.start = offset;
+ }
+ fn init_doctype_system_id(&mut self, offset: O) {
+ let Some(Token::Doctype(doctype)) = &mut self.current_token else {
+ debug_assert!(false);
+ return;
+ };
+ doctype.system_id = Some("".to_owned());
+ doctype.system_id_span.start = offset;
+ }
+ fn push_doctype_public_id(&mut self, s: &str) {
+ if let Some(Token::Doctype(Doctype {
+ public_id: Some(ref mut id),
+ ..
+ })) = self.current_token
+ {
+ id.push_str(s);
+ } else {
+ debug_assert!(false);
+ }
+ }
+
+ fn terminate_doctype_public_id(&mut self, offset: O) {
+ if let Some(Token::Doctype(Doctype {
+ ref mut public_id_span,
+ ..
+ })) = self.current_token
+ {
+ public_id_span.end = offset;
+ } else {
+ debug_assert!(false);
+ }
+ }
+
+ fn push_doctype_system_id(&mut self, s: &str) {
+ if let Some(Token::Doctype(Doctype {
+ system_id: Some(ref mut id),
+ ..
+ })) = self.current_token
+ {
+ id.push_str(s);
+ } else {
+ debug_assert!(false);
+ }
+ }
+
+ fn terminate_doctype_system_id(&mut self, offset: O) {
+ if let Some(Token::Doctype(Doctype {
+ ref mut system_id_span,
+ ..
+ })) = self.current_token
+ {
+ system_id_span.end = offset;
+ } else {
+ debug_assert!(false);
+ }
+ }
+}
+
+impl<O> DefaultEmitter<O> {
+ fn emit_token(&mut self, token: Token<O>) {
+ self.flush_current_characters();
+ self.emitted_tokens.push_front(token);
+ }
+
+ fn flush_current_attribute(&mut self)
+ where
+ O: Offset,
+ {
+ if let Some((name, map_val)) = self.current_attribute.take() {
+ match self.current_token {
+ Some(Token::StartTag(ref mut tag)) => match tag.attributes.inner.entry(name) {
+ Entry::Vacant(vacant) => {
+ vacant.insert(map_val);
+ }
+ Entry::Occupied(_) => {
+ self.push_error(Error::DuplicateAttribute, map_val.name_span);
+ }
+ },
+ Some(Token::EndTag(_)) => {
+ self.attr_in_end_tag_span = Some(map_val.name_span.clone());
+ if !self.seen_attributes.insert(name) {
+ self.push_error(Error::DuplicateAttribute, map_val.name_span);
+ }
+ }
+ _ => {
+ debug_assert!(false);
+ }
+ }
+ }
+ }
+
+ fn flush_current_characters(&mut self) {
+ if self.current_characters.is_empty() {
+ return;
+ }
+
+ let s = mem::take(&mut self.current_characters);
+ self.emit_token(Token::String(s));
+ }
+
+ fn push_error(&mut self, error: Error, span: Range<O>) {
+ // bypass character flushing in self.emit_token: we don't need the error location to be
+ // that exact
+ self.emitted_tokens.push_front(Token::Error { error, span });
+ }
+}
+
+/// The majority of our testing of the [`DefaultEmitter`] is done against the
+/// html5lib-tests in the html5lib integration test. This module only tests
+/// details that aren't present in the html5lib test data.
+#[cfg(test)]
+mod tests {
+ use super::DefaultEmitter;
+ use crate::token::{AttrValueSyntax, Token};
+ use crate::{Event, Tokenizer};
+
+ #[test]
+ fn test_attribute_value_syntax() {
+ let mut tokenizer = Tokenizer::new(
+ "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
+ DefaultEmitter::default(),
+ )
+ .flatten();
+ let Event::Token(Token::StartTag(start_tag)) = tokenizer.next().unwrap() else {
+ panic!("expected start tag");
+ };
+ assert_eq!(
+ start_tag.attributes.get("empty").unwrap().value_syntax(),
+ None
+ );
+ assert_eq!(
+ start_tag.attributes.get("unquoted").unwrap().value_syntax(),
+ Some(AttrValueSyntax::Unquoted)
+ );
+ assert_eq!(
+ start_tag
+ .attributes
+ .get("single-quoted")
+ .unwrap()
+ .value_syntax(),
+ Some(AttrValueSyntax::SingleQuoted)
+ );
+ assert_eq!(
+ start_tag
+ .attributes
+ .get("double-quoted")
+ .unwrap()
+ .value_syntax(),
+ Some(AttrValueSyntax::DoubleQuoted)
+ );
+ }
+}
diff --git a/src/emitter.rs b/src/emitter.rs
index 23f9ede..311c73c 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -1,12 +1,6 @@
-use std::collections::btree_map::Entry;
-use std::collections::BTreeSet;
-use std::collections::VecDeque;
-use std::mem;
use std::ops::Range;
-use crate::offset::NoopOffset;
-use crate::offset::Offset;
-use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token};
+use crate::token::AttrValueSyntax;
use crate::Error;
/// An emitter is an object providing methods to the tokenizer to produce ("emit") tokens.
@@ -193,419 +187,3 @@ pub trait Emitter<O> {
/// [`push_doctype_system_id`]: Self::push_doctype_system_id
fn terminate_doctype_system_id(&mut self, offset: O) {}
}
-
-/// The default implementation of [`Emitter`], used to produce tokens.
-///
-/// # Warning
-///
-/// * Using the DefaultEmitter without calling [`Tokenizer::set_state`]
-/// results in wrong state transitions:
-///
-/// ```
-/// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token};
-/// let emitter = DefaultEmitter::default();
-/// let html = "<script><b>";
-/// let mut tokens = Tokenizer::new(html, emitter).flatten();
-/// assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "script"));
-/// assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "b"));
-/// ```
-///
-/// [`Tokenizer::set_state`]: crate::Tokenizer::set_state
-pub struct DefaultEmitter<O = NoopOffset> {
- current_characters: String,
- current_token: Option<Token<O>>,
- current_attribute: Option<(String, crate::token::AttrInternal<O>)>,
- seen_attributes: BTreeSet<String>,
- emitted_tokens: VecDeque<Token<O>>,
- attr_in_end_tag_span: Option<Range<O>>,
-}
-
-impl<O> Default for DefaultEmitter<O> {
- fn default() -> Self {
- DefaultEmitter {
- current_characters: String::new(),
- current_token: None,
- current_attribute: None,
- seen_attributes: BTreeSet::new(),
- emitted_tokens: VecDeque::new(),
- attr_in_end_tag_span: None,
- }
- }
-}
-
-impl<O> DefaultEmitter<O> {
- fn emit_token(&mut self, token: Token<O>) {
- self.flush_current_characters();
- self.emitted_tokens.push_front(token);
- }
-
- fn flush_current_attribute(&mut self)
- where
- O: Offset,
- {
- if let Some((name, map_val)) = self.current_attribute.take() {
- match self.current_token {
- Some(Token::StartTag(ref mut tag)) => match tag.attributes.inner.entry(name) {
- Entry::Vacant(vacant) => {
- vacant.insert(map_val);
- }
- Entry::Occupied(_) => {
- self.push_error(Error::DuplicateAttribute, map_val.name_span);
- }
- },
- Some(Token::EndTag(_)) => {
- self.attr_in_end_tag_span = Some(map_val.name_span.clone());
- if !self.seen_attributes.insert(name) {
- self.push_error(Error::DuplicateAttribute, map_val.name_span);
- }
- }
- _ => {
- debug_assert!(false);
- }
- }
- }
- }
-
- fn flush_current_characters(&mut self) {
- if self.current_characters.is_empty() {
- return;
- }
-
- let s = mem::take(&mut self.current_characters);
- self.emit_token(Token::String(s));
- }
-
- fn push_error(&mut self, error: Error, span: Range<O>) {
- // bypass character flushing in self.emit_token: we don't need the error location to be
- // that exact
- self.emitted_tokens.push_front(Token::Error { error, span });
- }
-}
-
-impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
- type Token = Token<O>;
-
- fn emit_eof(&mut self) {
- self.flush_current_characters();
- }
-
- fn emit_error(&mut self, error: Error, span: Range<O>) {
- self.push_error(error, span);
- }
-
- fn pop_token(&mut self) -> Option<Self::Token> {
- self.emitted_tokens.pop_back()
- }
-
- fn emit_string(&mut self, s: &str) {
- self.current_characters.push_str(s);
- }
-
- fn init_start_tag(&mut self, tag_offset: O, name_offset: O) {
- self.current_token = Some(Token::StartTag(StartTag {
- span: tag_offset..O::default(),
- self_closing: false,
- name: String::new(),
- attributes: Default::default(),
- name_span: name_offset..O::default(),
- }));
- }
- fn init_end_tag(&mut self, tag_offset: O, name_offset: O) {
- self.current_token = Some(Token::EndTag(EndTag {
- span: tag_offset..O::default(),
- name: String::new(),
- name_span: name_offset..O::default(),
- }));
- self.seen_attributes.clear();
- }
-
- fn init_comment(&mut self, data_start_offset: O) {
- self.current_token = Some(Token::Comment(Comment {
- data: String::new(),
- data_span: data_start_offset..O::default(),
- }));
- }
- fn emit_current_tag(&mut self, offset: O) {
- self.flush_current_attribute();
- let mut token = self.current_token.take().unwrap();
- match &mut token {
- Token::EndTag(tag) => {
- if !self.seen_attributes.is_empty() {
- let span = self.attr_in_end_tag_span.take().unwrap();
- self.push_error(Error::EndTagWithAttributes, span);
- }
- self.seen_attributes.clear();
- tag.span.end = offset;
- }
- Token::StartTag(tag) => {
- tag.span.end = offset;
- }
- _ => debug_assert!(false),
- }
- self.emit_token(token);
- }
- fn emit_current_comment(&mut self, data_end_offset: O) {
- let mut token = self.current_token.take().unwrap();
- if let Token::Comment(comment) = &mut token {
- comment.data_span.end = data_end_offset;
- } else {
- debug_assert!(false);
- }
- self.emit_token(token);
- }
-
- fn emit_current_doctype(&mut self, offset: O) {
- let Some(Token::Doctype(mut doctype)) = self.current_token.take() else {
- debug_assert!(false);
- return;
- };
- doctype.span.end = offset;
- self.emit_token(Token::Doctype(doctype));
- }
-
- fn set_self_closing(&mut self, slash_span: Range<O>) {
- let tag = self.current_token.as_mut().unwrap();
- match tag {
- Token::StartTag(StartTag {
- ref mut self_closing,
- ..
- }) => {
- *self_closing = true;
- }
- Token::EndTag(_) => {
- self.emit_error(Error::EndTagWithTrailingSolidus, slash_span);
- }
- _ => {
- debug_assert!(false);
- }
- }
- }
- fn set_force_quirks(&mut self) {
- match self.current_token {
- Some(Token::Doctype(ref mut doctype)) => doctype.force_quirks = true,
- _ => debug_assert!(false),
- }
- }
- fn push_tag_name(&mut self, s: &str) {
- match self.current_token {
- Some(Token::StartTag(StartTag { ref mut name, .. })) => {
- name.push_str(s);
- }
- Some(Token::EndTag(EndTag { ref mut name, .. })) => {
- name.push_str(s);
- }
- _ => debug_assert!(false),
- }
- }
-
- fn terminate_tag_name(&mut self, offset: O) {
- match self.current_token {
- Some(Token::StartTag(StartTag {
- ref mut name_span, ..
- })) => {
- name_span.end = offset;
- }
- Some(Token::EndTag(EndTag {
- ref mut name_span, ..
- })) => {
- name_span.end = offset;
- }
- _ => debug_assert!(false),
- }
- }
-
- fn push_comment(&mut self, s: &str) {
- match self.current_token {
- Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s),
- _ => debug_assert!(false),
- }
- }
-
- fn init_doctype_name(&mut self, offset: O) {
- let Some(Token::Doctype(doctype)) = &mut self.current_token else {
- debug_assert!(false);
- return;
- };
- doctype.name = Some("".into());
- doctype.name_span.start = offset;
- }
-
- fn push_doctype_name(&mut self, s: &str) {
- match self.current_token {
- Some(Token::Doctype(Doctype {
- name: Some(ref mut name),
- ..
- })) => name.push_str(s),
- _ => debug_assert!(false),
- }
- }
-
- fn terminate_doctype_name(&mut self, offset: O) {
- let Some(Token::Doctype(doctype)) = &mut self.current_token else {
- debug_assert!(false);
- return;
- };
- doctype.name_span.end = offset;
- }
-
- fn init_doctype(&mut self, offset: O) {
- self.current_token = Some(Token::Doctype(Doctype {
- name: None,
- force_quirks: false,
- public_id: None,
- system_id: None,
- span: offset..O::default(),
- name_span: O::default()..O::default(),
- public_id_span: O::default()..O::default(),
- system_id_span: O::default()..O::default(),
- }));
- }
-
- fn init_attribute_name(&mut self, offset: O) {
- self.flush_current_attribute();
- self.current_attribute = Some((
- String::new(),
- crate::token::AttrInternal {
- name_span: offset..O::default(),
- value: String::new(),
- value_span: O::default()..O::default(),
- value_syntax: None,
- },
- ));
- }
- fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
- let (_, current_attribute) = self.current_attribute.as_mut().unwrap();
- current_attribute.value_span.start = offset;
- current_attribute.value_syntax = Some(syntax);
- }
-
- fn push_attribute_name(&mut self, s: &str) {
- let current_attr = self.current_attribute.as_mut().unwrap();
- current_attr.0.push_str(s);
- }
-
- fn terminate_attribute_name(&mut self, offset: O) {
- let current_attr = self.current_attribute.as_mut().unwrap();
- current_attr.1.name_span.end = offset;
- }
-
- fn push_attribute_value(&mut self, s: &str) {
- let current_attr = self.current_attribute.as_mut().unwrap();
- current_attr.1.value.push_str(s);
- }
-
- fn terminate_attribute_value(&mut self, offset: O) {
- let current_attr = self.current_attribute.as_mut().unwrap();
- current_attr.1.value_span.end = offset;
- }
-
- fn init_doctype_public_id(&mut self, offset: O) {
- let Some(Token::Doctype(doctype)) = &mut self.current_token else {
- debug_assert!(false);
- return;
- };
- doctype.public_id = Some("".to_owned());
- doctype.public_id_span.start = offset;
- }
- fn init_doctype_system_id(&mut self, offset: O) {
- let Some(Token::Doctype(doctype)) = &mut self.current_token else {
- debug_assert!(false);
- return;
- };
- doctype.system_id = Some("".to_owned());
- doctype.system_id_span.start = offset;
- }
- fn push_doctype_public_id(&mut self, s: &str) {
- if let Some(Token::Doctype(Doctype {
- public_id: Some(ref mut id),
- ..
- })) = self.current_token
- {
- id.push_str(s);
- } else {
- debug_assert!(false);
- }
- }
-
- fn terminate_doctype_public_id(&mut self, offset: O) {
- if let Some(Token::Doctype(Doctype {
- ref mut public_id_span,
- ..
- })) = self.current_token
- {
- public_id_span.end = offset;
- } else {
- debug_assert!(false);
- }
- }
-
- fn push_doctype_system_id(&mut self, s: &str) {
- if let Some(Token::Doctype(Doctype {
- system_id: Some(ref mut id),
- ..
- })) = self.current_token
- {
- id.push_str(s);
- } else {
- debug_assert!(false);
- }
- }
-
- fn terminate_doctype_system_id(&mut self, offset: O) {
- if let Some(Token::Doctype(Doctype {
- ref mut system_id_span,
- ..
- })) = self.current_token
- {
- system_id_span.end = offset;
- } else {
- debug_assert!(false);
- }
- }
-}
-
-/// The majority of our testing of the [`DefaultEmitter`] is done against the
-/// html5lib-tests in the html5lib integration test. This module only tests
-/// details that aren't present in the html5lib test data.
-#[cfg(test)]
-mod tests {
- use super::DefaultEmitter;
- use crate::token::{AttrValueSyntax, Token};
- use crate::{Event, Tokenizer};
-
- #[test]
- fn test_attribute_value_syntax() {
- let mut tokenizer = Tokenizer::new(
- "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
- DefaultEmitter::default(),
- )
- .flatten();
- let Event::Token(Token::StartTag(start_tag)) = tokenizer.next().unwrap() else {
- panic!("expected start tag");
- };
- assert_eq!(
- start_tag.attributes.get("empty").unwrap().value_syntax(),
- None
- );
- assert_eq!(
- start_tag.attributes.get("unquoted").unwrap().value_syntax(),
- Some(AttrValueSyntax::Unquoted)
- );
- assert_eq!(
- start_tag
- .attributes
- .get("single-quoted")
- .unwrap()
- .value_syntax(),
- Some(AttrValueSyntax::SingleQuoted)
- );
- assert_eq!(
- start_tag
- .attributes
- .get("double-quoted")
- .unwrap()
- .value_syntax(),
- Some(AttrValueSyntax::DoubleQuoted)
- );
- }
-}
diff --git a/src/lib.rs b/src/lib.rs
index 2918c80..194add9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,6 +7,7 @@
#![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))]
#![doc = include_str!("../README.md")]
+mod default_emitter;
mod emitter;
mod entities;
mod error;
@@ -25,7 +26,8 @@ pub mod offset;
pub mod reader;
pub mod token;
-pub use emitter::{DefaultEmitter, Emitter};
+pub use default_emitter::DefaultEmitter;
+pub use emitter::Emitter;
pub use error::Error;
pub use naive_parser::NaiveParser;
pub use token::{Comment, Doctype, EndTag, StartTag, Token};
diff --git a/src/naive_parser.rs b/src/naive_parser.rs
index f42072a..c305343 100644
--- a/src/naive_parser.rs
+++ b/src/naive_parser.rs
@@ -1,4 +1,4 @@
-use crate::emitter::DefaultEmitter;
+use crate::default_emitter::DefaultEmitter;
use crate::offset::{Offset, Position};
use crate::reader::{IntoReader, Reader};
use crate::tokenizer::CdataAction;