use std::ops::Range; use crate::trace::AttrValueSyntax; use crate::Error; /// An emitter is an object providing methods to the tokenizer to produce ("emit") tokens. /// /// Domain-specific applications of the HTML tokenizer can manually implement this trait to /// customize per-token allocations, or avoid them altogether. /// /// An emitter is assumed to have these internal states: /// /// * _current token_: Can be a tag, doctype or comment token. There's only one current token. /// * _current attribute_: The currently processed HTML attribute, consisting of two strings for name and value. /// /// The following methods are describing what kind of behavior the WHATWG spec expects, but that /// doesn't mean you need to follow it. For example: /// /// * If your usage of the tokenizer will ignore all errors, none of the error handling and /// validation requirements apply to you. You can implement `report_error` as noop and omit all /// checks that would emit errors. /// /// * If you don't care about attributes at all, you can make all related methods a noop. #[allow(unused_variables)] // workaround for https://github.com/rust-lang/rust/issues/91074 pub trait Emitter { /// Reports a parse error. fn report_error(&mut self, error: Error, span: Range); /// Emits the given character as a character token. fn emit_char(&mut self, char: char, span: Range); /// The state machine has reached the end of the file. fn emit_eof(&mut self, offset: O); /// Set the _current token_ to a start tag. fn init_start_tag(&mut self, tag_offset: O, name_offset: O); /// Set the _current token_ to an end tag. fn init_end_tag(&mut self, tag_offset: O, name_offset: O); /// Assuming the _current token_ is a start/end tag, append a string to the current tag's name. /// /// If the current token is not a start or end tag, this method may panic. fn push_tag_name(&mut self, s: &str); /// Called after the last [`push_tag_name`] call for a tag name. /// /// [`push_tag_name`]: Self::push_tag_name fn terminate_tag_name(&mut self, offset: O) {} /// Set the _current attribute_ to a new one, starting with empty name and value strings. /// /// The old attribute, if any, should be put on the _current token_. If an attribute with that /// name already exists, WHATWG says the new one should be ignored and a /// [`Error::DuplicateAttribute`] error should be emitted. /// /// If the current token is no tag at all, this method may panic. fn init_attribute_name(&mut self, offset: O); /// Append a string to the current attribute's name. /// /// If there is no current attribute, this method may panic. fn push_attribute_name(&mut self, s: &str); /// Called after the last [`push_attribute_name`] call for an attribute name. /// /// [`push_attribute_name`]: Self::push_attribute_name fn terminate_attribute_name(&mut self, offset: O) {} /// Called before the first push_attribute_value call. /// /// If there is no current attribute, this method may panic. fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {} /// Append a string to the current attribute's value. /// /// If there is no current attribute, this method may panic. fn push_attribute_value(&mut self, s: &str); /// Called after the last [`push_attribute_value`] call for an attribute value. /// /// [`push_attribute_value`]: Self::push_attribute_value fn terminate_attribute_value(&mut self, offset: O) {} /// Assuming the _current token_ is a start tag, set the self-closing flag. /// /// If the current token is not a start or end tag, this method may panic. /// /// If the current token is an end tag, the emitter should emit the /// [`Error::EndTagWithTrailingSolidus`] error. fn set_self_closing(&mut self, slash_span: Range); /// Emit the _current token_, assuming it is a tag. /// /// Also get the current attribute and append it to the to-be-emitted tag. See docstring for /// [`Emitter::init_attribute_name`] for how duplicates should be handled. /// /// If an end tag is emitted with attributes, an [`Error::EndTagWithAttributes`] /// error should be emitted. /// /// If the current token is not a start/end tag, this method may panic. fn emit_current_tag(&mut self, offset: O); /// Set the _current token_ to a comment. fn init_comment(&mut self, data_start_offset: O); /// Assuming the _current token_ is a comment, append a string to the comment's contents. /// /// If the current token is not a comment, this method may panic. fn push_comment(&mut self, s: &str); /// Emit the _current token_, assuming it is a comment. /// /// If the current token is not a comment, this method may panic. fn emit_current_comment(&mut self, data_end_offset: O); /// Set the _current token_ to a new doctype token: /// /// * the name should be empty /// * the "public identifier" should be null (different from empty) /// * the "system identifier" should be null (different from empty) /// * the "force quirks" flag should be `false` fn init_doctype(&mut self, offset: O); /// Assuming the _current token_ is a doctype, set its name to the empty string. /// /// If the current token is not a doctype, this method may panic. fn init_doctype_name(&mut self, offset: O) {} /// Assuming the _current token_ is a doctype, append a string to the doctype's name. /// /// If the current token is not a doctype, this method may panic. fn push_doctype_name(&mut self, s: &str); /// Called after the last [`push_doctype_name`] call for a DOCTYPE name. /// /// [`push_doctype_name`]: Self::push_doctype_name fn terminate_doctype_name(&mut self, offset: O) {} /// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string. /// /// If the current token is not a doctype, this method may panic. fn init_doctype_public_id(&mut self, offset: O); /// Assuming the _current token_ is a doctype, append a string to its "public identifier" to the given string. /// /// If the current token is not a doctype, this method may panic. fn push_doctype_public_id(&mut self, s: &str); /// Called after the last [`push_doctype_public_id`] call for a DOCTYPE public identifier. /// /// [`push_doctype_public_id`]: Self::push_doctype_public_id fn terminate_doctype_public_id(&mut self, offset: O) {} /// Assuming the _current token_ is a doctype, set its "system identifier" to the empty string. /// /// If the current token is not a doctype, this method may panic. fn init_doctype_system_id(&mut self, offset: O); /// Assuming the _current token_ is a doctype, append a string to its "system identifier" to the given string. /// /// If the current token is not a doctype, this method may panic. fn push_doctype_system_id(&mut self, s: &str); /// Called after the last [`push_doctype_system_id`] call for a DOCTYPE system identifier. /// /// [`push_doctype_system_id`]: Self::push_doctype_system_id fn terminate_doctype_system_id(&mut self, offset: O) {} /// Assuming the _current token_ is a doctype, set its "force quirks" flag to true. /// /// If the current token is not a doctype, this method pay panic. fn set_force_quirks(&mut self); /// Emit the _current token_, assuming it is a doctype. /// /// If the current token is not a doctype, this method may panic. fn emit_current_doctype(&mut self, offset: O); }