aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-09 21:53:22 +0200
committerMartin Fischer <martin@push-f.com>2023-09-28 10:36:08 +0200
commit5aa3b82fbe62882da8007b0a4548b979c845aa97 (patch)
tree9788640728ea7894a7ff53c561ed10bff3a611c1 /src
parent2c73901944e2d22747a2a4ebcc11881b3f8c2ad3 (diff)
refactor: move machine impl details to machine module
This commit separates the public API (the "Tokenizer") from the internal implementation (the "Machine") to make the code more readable.
Diffstat (limited to 'src')
-rw-r--r--src/naive_parser.rs6
-rw-r--r--src/tokenizer.rs263
-rw-r--r--src/tokenizer/machine.rs90
-rw-r--r--src/tokenizer/machine/utils.rs193
4 files changed, 293 insertions, 259 deletions
diff --git a/src/naive_parser.rs b/src/naive_parser.rs
index 5bf002b..c5e9568 100644
--- a/src/naive_parser.rs
+++ b/src/naive_parser.rs
@@ -35,7 +35,7 @@ impl<R: Reader + Position<O>, O: Offset> NaiveParser<R, O, DefaultEmitter<O>> {
// TODO: add example for NaiveParser::new
pub fn new<'a>(reader: impl IntoReader<'a, Reader = R>) -> Self {
let mut tokenizer = Tokenizer::new(reader, DefaultEmitter::default());
- tokenizer.naively_switch_state = true;
+ tokenizer.enable_naive_state_switching();
NaiveParser { tokenizer }
}
}
@@ -45,7 +45,7 @@ impl<R: Reader + Position<usize>> NaiveParser<R, usize, DefaultEmitter<usize>> {
// TODO: add example for NaiveParser::new_with_spans
pub fn new_with_spans<'a>(reader: impl IntoReader<'a, Reader = R>) -> Self {
let mut tokenizer = Tokenizer::new(reader, DefaultEmitter::default());
- tokenizer.naively_switch_state = true;
+ tokenizer.enable_naive_state_switching();
NaiveParser { tokenizer }
}
}
@@ -55,7 +55,7 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> NaiveParser<R, O, E> {
// TODO: add example for NaiveParser::new_with_emitter
pub fn new_with_emitter<'a>(reader: impl IntoReader<'a, Reader = R>, emitter: E) -> Self {
let mut tokenizer = Tokenizer::new(reader, emitter);
- tokenizer.naively_switch_state = true;
+ tokenizer.enable_naive_state_switching();
NaiveParser { tokenizer }
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 6f698f6..7c38e49 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1,41 +1,13 @@
-mod machine;
+pub(crate) mod machine;
-use crate::naive_parser::naive_next_state;
use crate::offset::{Offset, Position};
use crate::reader::{IntoReader, Reader};
-use crate::{Emitter, Error};
-use machine::utils::{control_pat, noncharacter_pat, surrogate_pat};
+use crate::Emitter;
use machine::ControlToken;
#[cfg(feature = "integration-tests")]
pub use machine::State as InternalState;
-// this is a stack that can hold 0 to 2 Ts
-#[derive(Debug, Default, Clone, Copy)]
-struct Stack2<T: Copy>(Option<(T, Option<T>)>);
-
-impl<T: Copy> Stack2<T> {
- #[inline]
- fn push(&mut self, c: T) {
- self.0 = match self.0 {
- None => Some((c, None)),
- Some((c1, None)) => Some((c1, Some(c))),
- Some((_c1, Some(_c2))) => panic!("stack full!"),
- }
- }
-
- #[inline]
- fn pop(&mut self) -> Option<T> {
- let (new_self, rv) = match self.0 {
- Some((c1, Some(c2))) => (Some((c1, None)), Some(c2)),
- Some((c1, None)) => (None, Some(c1)),
- None => (None, None),
- };
- self.0 = new_self;
- rv
- }
-}
-
/// An HTML tokenizer.
///
/// # Warning
@@ -56,27 +28,9 @@ impl<T: Copy> Stack2<T> {
///
/// [`NaiveParser`]: crate::NaiveParser
/// [tree construction]: https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
-pub struct Tokenizer<R: Reader, O, E: Emitter<O>> {
+pub struct Tokenizer<R, O, E> {
+ machine: machine::Machine<R, O, E>,
eof: bool,
- pub(crate) state: machine::State,
- pub(crate) emitter: E,
- pub(crate) temporary_buffer: String,
- pub(crate) reader: R,
- to_reconsume: Stack2<Option<char>>,
- pub(crate) character_reference_code: u32,
- pub(crate) return_state: Option<machine::State>,
- current_tag_name: String,
- last_start_tag_name: String,
- is_start_tag: bool,
- /// The reader position before the match block in [`machine::consume`].
- pub(crate) position_before_match: O,
- /// * Set to the offset of `<` in [`machine::State::Data`].
- /// * Set to the offset of `-` in [`machine::State::Comment`].
- /// * Set to the offset of `&` in [`machine::State::CharacterReference`].
- pub(crate) some_offset: O,
- /// This boolean flag exists so that the [`NaiveParser`](crate::NaiveParser) can work with any [`Emitter`]
- /// (it cannot call [`Tokenizer::set_state`] using the emitted start tags since they can be of an arbitrary type).
- pub(crate) naively_switch_state: bool,
}
impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
@@ -88,20 +42,8 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
/// [tree construction]: https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
pub fn new<'a>(reader: impl IntoReader<'a, Reader = R>, emitter: E) -> Self {
Tokenizer {
- reader: reader.into_reader(),
- emitter,
- state: machine::State::Data,
- to_reconsume: Stack2::default(),
- return_state: None,
- temporary_buffer: String::new(),
- character_reference_code: 0,
+ machine: machine::Machine::new(reader.into_reader(), emitter),
eof: false,
- current_tag_name: String::new(),
- last_start_tag_name: String::new(),
- is_start_tag: false,
- position_before_match: O::default(),
- some_offset: O::default(),
- naively_switch_state: false,
}
}
@@ -114,12 +56,12 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
///
/// [Markup declaration open state]: https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
pub fn handle_cdata_open(&mut self, action: CdataAction) {
- machine::handle_cdata_open(self, action);
+ machine::handle_cdata_open(&mut self.machine, action);
}
/// Returns a mutable reference to the emitter.
pub fn emitter_mut(&mut self) -> &mut E {
- &mut self.emitter
+ &mut self.machine.emitter
}
}
@@ -197,185 +139,12 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
/// Only available with the `integration-tests` feature which is not public API.
#[cfg(feature = "integration-tests")]
pub fn set_internal_state(&mut self, state: InternalState) {
- self.state = state;
+ self.machine.state = state;
}
/// Set the statemachine to start/continue in the given state.
pub fn set_state(&mut self, state: State) {
- self.state = state.into();
- }
-
- /// Just a helper method for the machine.
- #[inline]
- pub(crate) fn emit_error(&mut self, error: Error) {
- let span = match error {
- Error::EofBeforeTagName
- | Error::EofInCdata
- | Error::EofInComment
- | Error::EofInDoctype
- | Error::EofInScriptHtmlCommentLikeText
- | Error::EofInTag
- | Error::MissingSemicolonAfterCharacterReference => {
- self.reader.position()..self.reader.position()
- }
- Error::AbsenceOfDigitsInNumericCharacterReference
- | Error::NullCharacterReference
- | Error::CharacterReferenceOutsideUnicodeRange
- | Error::SurrogateCharacterReference
- | Error::NoncharacterCharacterReference
- | Error::ControlCharacterReference
- | Error::UnknownNamedCharacterReference => self.some_offset..self.reader.position(),
-
- _ => self.position_before_match..self.reader.position(),
- };
- self.emitter.report_error(error, span);
- }
-
- /// Assuming the _current token_ is an end tag, return true if all of these hold. Return false otherwise.
- ///
- /// * the _last start tag_ exists
- /// * the current end tag token's name equals to the last start tag's name.
- ///
- /// See also WHATWG's definition of [appropriate end tag token].
- ///
- /// [appropriate end tag token]: https://html.spec.whatwg.org/multipage/parsing.html#appropriate-end-tag-token
- #[inline]
- pub(crate) fn current_end_tag_is_appropriate(&mut self) -> bool {
- self.current_tag_name == self.last_start_tag_name
- }
-
- #[inline]
- pub(crate) fn init_start_tag(&mut self) {
- self.emitter
- .init_start_tag(self.some_offset, self.position_before_match);
- self.current_tag_name.clear();
- self.is_start_tag = true;
- }
-
- #[inline]
- pub(crate) fn init_end_tag(&mut self) {
- self.emitter
- .init_end_tag(self.some_offset, self.position_before_match);
- self.current_tag_name.clear();
- self.is_start_tag = false;
- }
-
- #[inline]
- pub(crate) fn push_tag_name(&mut self, s: &str) {
- self.emitter.push_tag_name(s);
- self.current_tag_name.push_str(s);
- }
-
- #[inline]
- pub(crate) fn emit_current_tag(&mut self) {
- self.emitter.emit_current_tag(self.reader.position());
- if self.is_start_tag {
- if self.naively_switch_state {
- self.state = naive_next_state(&self.current_tag_name).into();
- }
- std::mem::swap(&mut self.last_start_tag_name, &mut self.current_tag_name);
- }
- }
-
- #[inline]
- pub(crate) fn unread_char(&mut self, c: Option<char>) {
- self.to_reconsume.push(c);
- }
-
- #[inline]
- fn validate_char(&mut self, c: char) {
- match c as u32 {
- surrogate_pat!() => {
- self.emit_error(Error::SurrogateInInputStream);
- }
- noncharacter_pat!() => {
- self.emit_error(Error::NoncharacterInInputStream);
- }
- // control without whitespace or nul
- x @ control_pat!()
- if !matches!(x, 0x0000 | 0x0009 | 0x000a | 0x000c | 0x000d | 0x0020) =>
- {
- self.emit_error(Error::ControlCharacterInInputStream);
- }
- _ => (),
- }
- }
-
- pub(crate) fn read_char(&mut self) -> Result<Option<char>, R::Error> {
- let (c_res, reconsumed) = match self.to_reconsume.pop() {
- Some(c) => (Ok(c), true),
- None => (self.reader.read_char(), false),
- };
-
- let mut c = match c_res {
- Ok(Some(c)) => c,
- res => return res,
- };
-
- if c == '\r' {
- c = '\n';
- let c2 = self.reader.read_char()?;
- if c2 != Some('\n') {
- self.unread_char(c2);
- }
- }
-
- if !reconsumed {
- self.validate_char(c);
- }
-
- Ok(Some(c))
- }
-
- #[inline]
- pub(crate) fn try_read_string(
- &mut self,
- mut s: &str,
- case_sensitive: bool,
- ) -> Result<bool, R::Error> {
- debug_assert!(!s.is_empty());
-
- let to_reconsume_bak = self.to_reconsume;
- let mut chars = s.chars();
- while let Some(c) = self.to_reconsume.pop() {
- if let (Some(x), Some(x2)) = (c, chars.next()) {
- if x == x2 || (!case_sensitive && x.to_ascii_lowercase() == x2.to_ascii_lowercase())
- {
- s = &s[x.len_utf8()..];
- continue;
- }
- }
-
- self.to_reconsume = to_reconsume_bak;
- return Ok(false);
- }
-
- self.reader.try_read_string(s, case_sensitive)
- }
-
- pub(crate) fn is_consumed_as_part_of_an_attribute(&self) -> bool {
- matches!(
- self.return_state,
- Some(
- machine::State::AttributeValueDoubleQuoted
- | machine::State::AttributeValueSingleQuoted
- | machine::State::AttributeValueUnquoted
- )
- )
- }
-
- pub(crate) fn flush_code_points_consumed_as_character_reference(&mut self) {
- if self.is_consumed_as_part_of_an_attribute() {
- self.emitter.push_attribute_value(&self.temporary_buffer);
- self.temporary_buffer.clear();
- } else {
- self.flush_buffer_characters();
- }
- }
-
- pub(crate) fn flush_buffer_characters(&mut self) {
- self.emitter.emit_string(&self.temporary_buffer);
- self.temporary_buffer.clear();
+ self.machine.state = state.into();
}
}
@@ -389,7 +158,7 @@ where
fn next(&mut self) -> Option<Self::Item> {
loop {
- if let Some(token) = self.emitter.next() {
+ if let Some(token) = self.machine.emitter.next() {
return Some(Ok(Event::Token(token)));
}
@@ -397,12 +166,12 @@ where
return None;
}
- match machine::consume(self) {
+ match machine::consume(&mut self.machine) {
Err(e) => return Some(Err(e)),
Ok(ControlToken::Continue) => (),
Ok(ControlToken::Eof) => {
self.eof = true;
- self.emitter.emit_eof();
+ self.machine.emitter.emit_eof();
}
Ok(ControlToken::CdataOpen) => return Some(Ok(Event::CdataOpen)),
}
@@ -411,12 +180,16 @@ where
}
impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> {
+ pub(crate) fn enable_naive_state_switching(&mut self) {
+ self.machine.naively_switch_state = true;
+ }
+
/// Test-internal function to override internal state.
///
/// Only available with the `integration-tests` feature which is not public API.
#[cfg(feature = "integration-tests")]
pub fn set_last_start_tag(&mut self, last_start_tag: &str) {
- self.last_start_tag_name.clear();
- self.last_start_tag_name.push_str(last_start_tag);
+ self.machine.last_start_tag_name.clear();
+ self.machine.last_start_tag_name.push_str(last_start_tag);
}
}
diff --git a/src/tokenizer/machine.rs b/src/tokenizer/machine.rs
index fc31a42..e9a3e68 100644
--- a/src/tokenizer/machine.rs
+++ b/src/tokenizer/machine.rs
@@ -1,16 +1,65 @@
-pub(super) mod utils;
+mod utils;
use crate::entities::try_read_character_reference;
use crate::offset::{Offset, Position};
use crate::token::AttrValueSyntax;
use crate::tokenizer::CdataAction;
-use crate::{reader::Reader, Emitter, Error, Tokenizer};
+use crate::{reader::Reader, Emitter, Error};
use utils::{
ascii_digit_pat, control_pat, ctostr, noncharacter_pat, surrogate_pat, whitespace_pat,
};
pub use utils::State;
+pub(super) struct Machine<R, O, E> {
+ pub(super) state: State,
+ pub(super) emitter: E,
+ temporary_buffer: String,
+ reader: R,
+ to_reconsume: Stack2<Option<char>>,
+ character_reference_code: u32,
+ return_state: Option<State>,
+ current_tag_name: String,
+ pub(super) last_start_tag_name: String,
+ is_start_tag: bool,
+ /// The reader position before the match block in [`consume`].
+ position_before_match: O,
+ /// * Set to the offset of `<` in [`State::Data`].
+ /// * Set to the offset of `-` in [`State::Comment`].
+ /// * Set to the offset of `&` in [`State::CharacterReference`].
+ some_offset: O,
+ /// This boolean flag exists so that the [`NaiveParser`](crate::NaiveParser) can work with any [`Emitter`]
+ /// (it cannot call [`Tokenizer::set_state`] using the emitted start tags since they can be of an arbitrary type).
+ ///
+ /// [`Tokenizer::set_state`]: super::Tokenizer::set_state
+ pub(crate) naively_switch_state: bool,
+}
+
+impl<R, O, E> Machine<R, O, E>
+where
+ R: Reader + Position<O>,
+ O: Offset,
+ E: Emitter<O>,
+{
+ pub fn new(reader: R, emitter: E) -> Self {
+ Self {
+ reader,
+ emitter,
+ state: State::Data,
+ to_reconsume: Stack2::default(),
+ return_state: None,
+ temporary_buffer: String::new(),
+ character_reference_code: 0,
+ current_tag_name: String::new(),
+ last_start_tag_name: String::new(),
+ is_start_tag: false,
+ position_before_match: O::default(),
+ some_offset: O::default(),
+ naively_switch_state: false,
+ }
+ }
+}
+
pub enum ControlToken {
Eof,
Continue,
@@ -18,7 +67,7 @@ pub enum ControlToken {
}
#[inline]
-pub fn consume<O, R, E>(slf: &mut Tokenizer<R, O, E>) -> Result<ControlToken, R::Error>
+pub(super) fn consume<O, R, E>(slf: &mut Machine<R, O, E>) -> Result<ControlToken, R::Error>
where
O: Offset,
R: Reader + Position<O>,
@@ -1964,15 +2013,8 @@ where
}
}
-impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
- #[inline]
- fn init_doctype(&mut self) {
- self.emitter.init_doctype(self.some_offset);
- }
-}
-
#[inline]
-pub fn handle_cdata_open<O, R, E>(slf: &mut Tokenizer<R, O, E>, action: CdataAction)
+pub(super) fn handle_cdata_open<O, R, E>(slf: &mut Machine<R, O, E>, action: CdataAction)
where
O: Offset,
R: Reader + Position<O>,
@@ -1989,3 +2031,29 @@ where
}
}
}
+
+// this is a stack that can hold 0 to 2 Ts
+#[derive(Debug, Default, Clone, Copy)]
+struct Stack2<T: Copy>(Option<(T, Option<T>)>);
+
+impl<T: Copy> Stack2<T> {
+ #[inline]
+ fn push(&mut self, c: T) {
+ self.0 = match self.0 {
+ None => Some((c, None)),
+ Some((c1, None)) => Some((c1, Some(c))),
+ Some((_c1, Some(_c2))) => panic!("stack full!"),
+ }
+ }
+
+ #[inline]
+ fn pop(&mut self) -> Option<T> {
+ let (new_self, rv) = match self.0 {
+ Some((c1, Some(c2))) => (Some((c1, None)), Some(c2)),
+ Some((c1, None)) => (None, Some(c1)),
+ None => (None, None),
+ };
+ self.0 = new_self;
+ rv
+ }
+}
diff --git a/src/tokenizer/machine/utils.rs b/src/tokenizer/machine/utils.rs
index 7d220cf..6e45f4d 100644
--- a/src/tokenizer/machine/utils.rs
+++ b/src/tokenizer/machine/utils.rs
@@ -1,3 +1,196 @@
+use crate::{
+ naive_parser::naive_next_state,
+ offset::{Offset, Position},
+ reader::Reader,
+ Emitter, Error,
+};
+
+use super::Machine;
+
+impl<R, O, E> Machine<R, O, E>
+where
+ R: Reader + Position<O>,
+ O: Offset,
+ E: Emitter<O>,
+{
+ #[inline]
+ pub(crate) fn emit_error(&mut self, error: Error) {
+ let span = match error {
+ Error::EofBeforeTagName
+ | Error::EofInCdata
+ | Error::EofInComment
+ | Error::EofInDoctype
+ | Error::EofInScriptHtmlCommentLikeText
+ | Error::EofInTag
+ | Error::MissingSemicolonAfterCharacterReference => {
+ self.reader.position()..self.reader.position()
+ }
+ Error::AbsenceOfDigitsInNumericCharacterReference
+ | Error::NullCharacterReference
+ | Error::CharacterReferenceOutsideUnicodeRange
+ | Error::SurrogateCharacterReference
+ | Error::NoncharacterCharacterReference
+ | Error::ControlCharacterReference
+ | Error::UnknownNamedCharacterReference => self.some_offset..self.reader.position(),
+
+ _ => self.position_before_match..self.reader.position(),
+ };
+ self.emitter.report_error(error, span);
+ }
+
+ /// Assuming the _current token_ is an end tag, return true if all of these hold. Return false otherwise.
+ ///
+ /// * the _last start tag_ exists
+ /// * the current end tag token's name equals to the last start tag's name.
+ ///
+ /// See also WHATWG's definition of [appropriate end tag token].
+ ///
+ /// [appropriate end tag token]: https://html.spec.whatwg.org/multipage/parsing.html#appropriate-end-tag-token
+ #[inline]
+ pub(super) fn current_end_tag_is_appropriate(&mut self) -> bool {
+ self.current_tag_name == self.last_start_tag_name
+ }
+
+ #[inline]
+ pub(super) fn init_start_tag(&mut self) {
+ self.emitter
+ .init_start_tag(self.some_offset, self.position_before_match);
+ self.current_tag_name.clear();
+ self.is_start_tag = true;
+ }
+
+ #[inline]
+ pub(super) fn init_end_tag(&mut self) {
+ self.emitter
+ .init_end_tag(self.some_offset, self.position_before_match);
+ self.current_tag_name.clear();
+ self.is_start_tag = false;
+ }
+
+ #[inline]
+ pub(super) fn init_doctype(&mut self) {
+ self.emitter.init_doctype(self.some_offset);
+ }
+
+ #[inline]
+ pub(super) fn push_tag_name(&mut self, s: &str) {
+ self.emitter.push_tag_name(s);
+ self.current_tag_name.push_str(s);
+ }
+
+ #[inline]
+ pub(super) fn emit_current_tag(&mut self) {
+ self.emitter.emit_current_tag(self.reader.position());
+ if self.is_start_tag {
+ if self.naively_switch_state {
+ self.state = naive_next_state(&self.current_tag_name).into();
+ }
+ std::mem::swap(&mut self.last_start_tag_name, &mut self.current_tag_name);
+ }
+ }
+
+ #[inline]
+ pub(super) fn unread_char(&mut self, c: Option<char>) {
+ self.to_reconsume.push(c);
+ }
+
+ #[inline]
+ fn validate_char(&mut self, c: char) {
+ match c as u32 {
+ surrogate_pat!() => {
+ self.emit_error(Error::SurrogateInInputStream);
+ }
+ noncharacter_pat!() => {
+ self.emit_error(Error::NoncharacterInInputStream);
+ }
+ // control without whitespace or nul
+ x @ control_pat!()
+ if !matches!(x, 0x0000 | 0x0009 | 0x000a | 0x000c | 0x000d | 0x0020) =>
+ {
+ self.emit_error(Error::ControlCharacterInInputStream);
+ }
+ _ => (),
+ }
+ }
+
+ pub(super) fn read_char(&mut self) -> Result<Option<char>, R::Error> {
+ let (c_res, reconsumed) = match self.to_reconsume.pop() {
+ Some(c) => (Ok(c), true),
+ None => (self.reader.read_char(), false),
+ };
+
+ let mut c = match c_res {
+ Ok(Some(c)) => c,
+ res => return res,
+ };
+
+ if c == '\r' {
+ c = '\n';
+ let c2 = self.reader.read_char()?;
+ if c2 != Some('\n') {
+ self.unread_char(c2);
+ }
+ }
+
+ if !reconsumed {
+ self.validate_char(c);
+ }
+
+ Ok(Some(c))
+ }
+
+ #[inline]
+ pub(super) fn try_read_string(
+ &mut self,
+ mut s: &str,
+ case_sensitive: bool,
+ ) -> Result<bool, R::Error> {
+ debug_assert!(!s.is_empty());
+
+ let to_reconsume_bak = self.to_reconsume;
+ let mut chars = s.chars();
+ while let Some(c) = self.to_reconsume.pop() {
+ if let (Some(x), Some(x2)) = (c, chars.next()) {
+ if x == x2 || (!case_sensitive && x.to_ascii_lowercase() == x2.to_ascii_lowercase())
+ {
+ s = &s[x.len_utf8()..];
+ continue;
+ }
+ }
+
+ self.to_reconsume = to_reconsume_bak;
+ return Ok(false);
+ }
+
+ self.reader.try_read_string(s, case_sensitive)
+ }
+
+ pub(super) fn is_consumed_as_part_of_an_attribute(&self) -> bool {
+ matches!(
+ self.return_state,
+ Some(
+ State::AttributeValueDoubleQuoted
+ | State::AttributeValueSingleQuoted
+ | State::AttributeValueUnquoted
+ )
+ )
+ }
+
+ pub(super) fn flush_code_points_consumed_as_character_reference(&mut self) {
+ if self.is_consumed_as_part_of_an_attribute() {
+ self.emitter.push_attribute_value(&self.temporary_buffer);
+ self.temporary_buffer.clear();
+ } else {
+ self.flush_buffer_characters();
+ }
+ }
+
+ pub(super) fn flush_buffer_characters(&mut self) {
+ self.emitter.emit_string(&self.temporary_buffer);
+ self.temporary_buffer.clear();
+ }
+}
+
macro_rules! surrogate_pat {
() => {
0xd800..=0xdfff