aboutsummaryrefslogtreecommitdiff
path: root/src/default_emitter.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/default_emitter.rs')
-rw-r--r--src/default_emitter.rs344
1 files changed, 0 insertions, 344 deletions
diff --git a/src/default_emitter.rs b/src/default_emitter.rs
deleted file mode 100644
index 7b6c51e..0000000
--- a/src/default_emitter.rs
+++ /dev/null
@@ -1,344 +0,0 @@
-use std::collections::btree_map::Entry;
-use std::collections::BTreeSet;
-use std::collections::VecDeque;
-use std::ops::Range;
-
-use crate::let_else::assume;
-use crate::offset::NoopOffset;
-use crate::offset::Offset;
-use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token};
-use crate::Emitter;
-use crate::Error;
-
-/// The default implementation of [`Emitter`], used to produce tokens.
-pub struct DefaultEmitter<O = NoopOffset> {
- current_token: Option<Token<O>>,
- current_attribute_name: String,
- current_attr_internal: crate::token::AttrInternal<O>,
- seen_attributes: BTreeSet<String>,
- emitted_tokens: VecDeque<Token<O>>,
- errors: VecDeque<(Error, Range<O>)>,
- attr_in_end_tag_span: Option<Range<O>>,
-}
-
-impl<O: Default> Default for DefaultEmitter<O> {
- fn default() -> Self {
- DefaultEmitter {
- current_token: None,
- current_attribute_name: String::new(),
- current_attr_internal: Default::default(),
- seen_attributes: BTreeSet::new(),
- emitted_tokens: VecDeque::new(),
- errors: VecDeque::new(),
- attr_in_end_tag_span: None,
- }
- }
-}
-
-impl<O> DefaultEmitter<O> {
- /// Removes all encountered tokenizer errors and returns them as an iterator.
- pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ {
- self.errors.drain(0..)
- }
-}
-
-impl<O> Iterator for DefaultEmitter<O> {
- type Item = Token<O>;
-
- fn next(&mut self) -> Option<Self::Item> {
- self.emitted_tokens.pop_back()
- }
-}
-
-impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
- fn report_error(&mut self, error: Error, span: Range<O>) {
- self.errors.push_back((error, span));
- }
-
- fn emit_char(&mut self, c: char) {
- self.emit_token(Token::Char(c));
- }
-
- fn emit_eof(&mut self) {
- self.emit_token(Token::EndOfFile);
- }
-
- fn init_start_tag(&mut self, tag_offset: O, name_offset: O) {
- self.current_token = Some(Token::StartTag(StartTag {
- span: tag_offset..O::default(),
- self_closing: false,
- name: String::new(),
- attributes: Default::default(),
- name_span: name_offset..O::default(),
- }));
- }
-
- fn init_end_tag(&mut self, tag_offset: O, name_offset: O) {
- self.current_token = Some(Token::EndTag(EndTag {
- span: tag_offset..O::default(),
- name: String::new(),
- name_span: name_offset..O::default(),
- }));
- self.seen_attributes.clear();
- }
-
- fn push_tag_name(&mut self, s: &str) {
- assume!(
- Some(Token::StartTag(StartTag { name, .. }) | Token::EndTag(EndTag { name, .. })),
- &mut self.current_token
- );
- name.push_str(s);
- }
-
- fn terminate_tag_name(&mut self, offset: O) {
- assume!(
- Some(
- Token::StartTag(StartTag { name_span, .. })
- | Token::EndTag(EndTag { name_span, .. })
- ),
- &mut self.current_token
- );
- name_span.end = offset;
- }
-
- fn init_attribute_name(&mut self, offset: O) {
- self.flush_current_attribute();
- self.current_attr_internal.name_span.start = offset;
- }
-
- fn push_attribute_name(&mut self, s: &str) {
- self.current_attribute_name.push_str(s);
- }
-
- fn terminate_attribute_name(&mut self, offset: O) {
- self.current_attr_internal.name_span.end = offset;
- }
-
- fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {
- self.current_attr_internal.value_span.start = offset;
- self.current_attr_internal.value_syntax = Some(syntax);
- }
-
- fn push_attribute_value(&mut self, s: &str) {
- self.current_attr_internal.value.push_str(s);
- }
-
- fn terminate_attribute_value(&mut self, offset: O) {
- self.current_attr_internal.value_span.end = offset;
- }
-
- fn set_self_closing(&mut self, slash_span: Range<O>) {
- let token = self.current_token.as_mut().unwrap();
-
- match token {
- Token::StartTag(tag) => {
- tag.self_closing = true;
- }
- Token::EndTag(_) => {
- self.report_error(Error::EndTagWithTrailingSolidus, slash_span);
- }
- other => debug_assert!(false, "unexpected current_token: {other:?}"),
- }
- }
-
- fn emit_current_tag(&mut self, offset: O) {
- self.flush_current_attribute();
- let mut token = self.current_token.take().unwrap();
- match &mut token {
- Token::EndTag(tag) => {
- if !self.seen_attributes.is_empty() {
- let span = self.attr_in_end_tag_span.take().unwrap();
- self.report_error(Error::EndTagWithAttributes, span);
- }
- self.seen_attributes.clear();
- tag.span.end = offset;
- }
- Token::StartTag(tag) => {
- tag.span.end = offset;
- }
- other => {
- debug_assert!(false, "unexpected current_token: {other:?}");
- return;
- }
- }
- self.emit_token(token);
- }
-
- fn init_comment(&mut self, data_start_offset: O) {
- self.current_token = Some(Token::Comment(Comment {
- data: String::new(),
- data_span: data_start_offset..O::default(),
- }));
- }
-
- fn push_comment(&mut self, s: &str) {
- assume!(Some(Token::Comment(comment)), &mut self.current_token);
- comment.data.push_str(s);
- }
-
- fn emit_current_comment(&mut self, data_end_offset: O) {
- let mut token = self.current_token.take().unwrap();
- assume!(Token::Comment(comment), &mut token);
- comment.data_span.end = data_end_offset;
- self.emit_token(token);
- }
-
- fn init_doctype(&mut self, offset: O) {
- self.current_token = Some(Token::Doctype(Doctype {
- name: None,
- force_quirks: false,
- public_id: None,
- system_id: None,
- span: offset..O::default(),
- name_span: O::default()..O::default(),
- public_id_span: O::default()..O::default(),
- system_id_span: O::default()..O::default(),
- }));
- }
-
- fn init_doctype_name(&mut self, offset: O) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.name = Some("".into());
- doctype.name_span.start = offset;
- }
-
- fn push_doctype_name(&mut self, s: &str) {
- assume!(
- Some(Token::Doctype(Doctype {
- name: Some(name),
- ..
- })),
- &mut self.current_token
- );
- name.push_str(s);
- }
-
- fn terminate_doctype_name(&mut self, offset: O) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.name_span.end = offset;
- }
-
- fn init_doctype_public_id(&mut self, offset: O) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.public_id = Some("".to_owned());
- doctype.public_id_span.start = offset;
- }
-
- fn push_doctype_public_id(&mut self, s: &str) {
- assume!(
- Some(Token::Doctype(Doctype {
- public_id: Some(public_id),
- ..
- })),
- &mut self.current_token
- );
- public_id.push_str(s);
- }
-
- fn terminate_doctype_public_id(&mut self, offset: O) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.public_id_span.end = offset;
- }
-
- fn init_doctype_system_id(&mut self, offset: O) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.system_id = Some("".to_owned());
- doctype.system_id_span.start = offset;
- }
-
- fn push_doctype_system_id(&mut self, s: &str) {
- assume!(
- Some(Token::Doctype(Doctype {
- system_id: Some(id),
- ..
- })),
- &mut self.current_token
- );
- id.push_str(s);
- }
-
- fn terminate_doctype_system_id(&mut self, offset: O) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.system_id_span.end = offset;
- }
-
- fn set_force_quirks(&mut self) {
- assume!(Some(Token::Doctype(doctype)), &mut self.current_token);
- doctype.force_quirks = true;
- }
-
- fn emit_current_doctype(&mut self, offset: O) {
- assume!(Some(Token::Doctype(mut doctype)), self.current_token.take());
- doctype.span.end = offset;
- self.emit_token(Token::Doctype(doctype));
- }
-}
-
-impl<O> DefaultEmitter<O> {
- fn emit_token(&mut self, token: Token<O>) {
- self.emitted_tokens.push_front(token);
- }
-
- fn flush_current_attribute(&mut self)
- where
- O: Offset,
- {
- if self.current_attribute_name.is_empty() {
- return;
- }
- let name = std::mem::take(&mut self.current_attribute_name);
- let attr_internal = std::mem::take(&mut self.current_attr_internal);
-
- match &mut self.current_token {
- Some(Token::StartTag(tag)) => match tag.attributes.inner.entry(name) {
- Entry::Vacant(vacant) => {
- vacant.insert(attr_internal);
- }
- Entry::Occupied(_) => {
- self.report_error(Error::DuplicateAttribute, attr_internal.name_span);
- }
- },
- Some(Token::EndTag(_)) => {
- self.attr_in_end_tag_span = Some(attr_internal.name_span.clone());
- if !self.seen_attributes.insert(name) {
- self.report_error(Error::DuplicateAttribute, attr_internal.name_span);
- }
- }
- other => debug_assert!(false, "unexpected current_token: {other:?}"),
- }
- }
-}
-
-/// The majority of our testing of the [`DefaultEmitter`] is done against the
-/// html5lib-tests in the html5lib integration test. This module only tests
-/// details that aren't present in the html5lib test data.
-#[cfg(test)]
-mod tests {
- use super::DefaultEmitter;
- use crate::token::{AttrValueSyntax, Token};
- use crate::{Event, Tokenizer};
-
- #[test]
- fn test_attribute_value_syntax() {
- let mut tokenizer = Tokenizer::new(
- "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">",
- DefaultEmitter::default(),
- )
- .flatten();
- let Event::Token(Token::StartTag(tag)) = tokenizer.next().unwrap() else {
- panic!("expected start tag");
- };
- for (name, syntax) in [
- ("empty", None),
- ("unquoted", Some(AttrValueSyntax::Unquoted)),
- ("single-quoted", Some(AttrValueSyntax::SingleQuoted)),
- ("double-quoted", Some(AttrValueSyntax::DoubleQuoted)),
- ] {
- assert_eq!(
- tag.attributes.get(name).unwrap().value_syntax(),
- syntax,
- "unexpected value for attribute {name}"
- );
- }
- }
-}