use std::collections::btree_map::Entry; use std::collections::BTreeSet; use std::collections::VecDeque; use std::ops::Range; use crate::let_else::assume; use crate::offset::NoopOffset; use crate::offset::Offset; use crate::token::{Doctype, EndTag, StartTag, Token}; use crate::Emitter; use crate::Error; /// An [`Emitter`] implementation that yields [`Token`]. pub struct BasicEmitter { current_token: Option, current_attribute_name: String, current_attr_internal: crate::token::AttrInternal, seen_attributes: BTreeSet, emitted_tokens: VecDeque, errors: VecDeque<(Error, Range)>, attr_name_span: Range, } impl Default for BasicEmitter { fn default() -> Self { BasicEmitter { current_token: None, current_attribute_name: String::new(), current_attr_internal: Default::default(), seen_attributes: BTreeSet::new(), emitted_tokens: VecDeque::new(), errors: VecDeque::new(), attr_name_span: Default::default(), } } } impl BasicEmitter { /// Removes all encountered tokenizer errors and returns them as an iterator. pub fn drain_errors(&mut self) -> impl Iterator)> + '_ { self.errors.drain(0..) } } impl Iterator for BasicEmitter { type Item = Token; fn next(&mut self) -> Option { self.emitted_tokens.pop_back() } } #[allow(unused_variables)] impl Emitter for BasicEmitter { fn report_error(&mut self, error: Error, span: Range) { self.errors.push_back((error, span)); } fn emit_char(&mut self, c: char) { self.emit_token(Token::Char(c)); } fn emit_eof(&mut self) { self.emit_token(Token::EndOfFile); } fn init_start_tag(&mut self, tag_offset: O, name_offset: O) { self.current_token = Some(Token::StartTag(StartTag { self_closing: false, name: String::new(), attributes: Default::default(), })); } fn init_end_tag(&mut self, tag_offset: O, name_offset: O) { self.current_token = Some(Token::EndTag(EndTag { name: String::new(), })); self.seen_attributes.clear(); } fn push_tag_name(&mut self, s: &str) { assume!( Some(Token::StartTag(StartTag { name, .. }) | Token::EndTag(EndTag { name, .. })), &mut self.current_token ); name.push_str(s); } fn init_attribute_name(&mut self, offset: O) { self.flush_current_attribute(); self.attr_name_span.start = offset; } fn push_attribute_name(&mut self, s: &str) { self.current_attribute_name.push_str(s); } fn terminate_attribute_name(&mut self, offset: O) { self.attr_name_span.end = offset; } fn push_attribute_value(&mut self, s: &str) { self.current_attr_internal.value.push_str(s); } fn set_self_closing(&mut self, slash_span: Range) { let token = self.current_token.as_mut().unwrap(); match token { Token::StartTag(tag) => { tag.self_closing = true; } Token::EndTag(_) => { self.report_error(Error::EndTagWithTrailingSolidus, slash_span); } other => debug_assert!(false, "unexpected current_token: {other:?}"), } } fn emit_current_tag(&mut self, offset: O) { self.flush_current_attribute(); let mut token = self.current_token.take().unwrap(); match &mut token { Token::EndTag(_) => { if !self.seen_attributes.is_empty() { self.report_error(Error::EndTagWithAttributes, self.attr_name_span.clone()); } self.seen_attributes.clear(); } Token::StartTag(_) => {} other => { debug_assert!(false, "unexpected current_token: {other:?}"); return; } } self.emit_token(token); } fn init_comment(&mut self, data_start_offset: O) { self.current_token = Some(Token::Comment(String::new())); } fn push_comment(&mut self, s: &str) { assume!(Some(Token::Comment(data)), &mut self.current_token); data.push_str(s); } fn emit_current_comment(&mut self, data_end_offset: O) { let token = self.current_token.take().unwrap(); self.emit_token(token); } fn init_doctype(&mut self, offset: O) { self.current_token = Some(Token::Doctype(Doctype { name: None, force_quirks: false, public_id: None, system_id: None, })); } fn init_doctype_name(&mut self, offset: O) { assume!(Some(Token::Doctype(doctype)), &mut self.current_token); doctype.name = Some("".into()); } fn push_doctype_name(&mut self, s: &str) { assume!( Some(Token::Doctype(Doctype { name: Some(name), .. })), &mut self.current_token ); name.push_str(s); } fn init_doctype_public_id(&mut self, offset: O) { assume!(Some(Token::Doctype(doctype)), &mut self.current_token); doctype.public_id = Some("".to_owned()); } fn push_doctype_public_id(&mut self, s: &str) { assume!( Some(Token::Doctype(Doctype { public_id: Some(public_id), .. })), &mut self.current_token ); public_id.push_str(s); } fn init_doctype_system_id(&mut self, offset: O) { assume!(Some(Token::Doctype(doctype)), &mut self.current_token); doctype.system_id = Some("".to_owned()); } fn push_doctype_system_id(&mut self, s: &str) { assume!( Some(Token::Doctype(Doctype { system_id: Some(id), .. })), &mut self.current_token ); id.push_str(s); } fn set_force_quirks(&mut self) { assume!(Some(Token::Doctype(doctype)), &mut self.current_token); doctype.force_quirks = true; } fn emit_current_doctype(&mut self, offset: O) { let token = self.current_token.take().unwrap(); self.emit_token(token); } } impl BasicEmitter { fn emit_token(&mut self, token: Token) { self.emitted_tokens.push_front(token); } fn flush_current_attribute(&mut self) where O: Offset, { if self.current_attribute_name.is_empty() { return; } let name = std::mem::take(&mut self.current_attribute_name); let attr_internal = std::mem::take(&mut self.current_attr_internal); match &mut self.current_token { Some(Token::StartTag(tag)) => match tag.attributes.inner.entry(name) { Entry::Vacant(vacant) => { vacant.insert(attr_internal); } Entry::Occupied(_) => { self.report_error(Error::DuplicateAttribute, self.attr_name_span.clone()); } }, Some(Token::EndTag(_)) => { if !self.seen_attributes.insert(name) { self.report_error(Error::DuplicateAttribute, self.attr_name_span.clone()); } } other => debug_assert!(false, "unexpected current_token: {other:?}"), } } }