diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/tokenizer/machine.rs | 124 | ||||
-rw-r--r-- | src/tokenizer/machine/utils.rs | 18 |
2 files changed, 80 insertions, 62 deletions
diff --git a/src/tokenizer/machine.rs b/src/tokenizer/machine.rs index e9a3e68..8b09aa7 100644 --- a/src/tokenizer/machine.rs +++ b/src/tokenizer/machine.rs @@ -105,11 +105,11 @@ where } Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); - slf.emitter.emit_string("\0"); + slf.emit_char('\0'); Ok(ControlToken::Continue) } Some(x) => { - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } None => Ok(ControlToken::Eof), @@ -126,11 +126,11 @@ where } Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } Some(x) => { - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } None => Ok(ControlToken::Eof), @@ -142,11 +142,11 @@ where } Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } Some(x) => { - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } None => Ok(ControlToken::Eof), @@ -158,11 +158,11 @@ where } Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } Some(x) => { - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } None => Ok(ControlToken::Eof), @@ -170,11 +170,11 @@ where State::PlainText => match slf.read_char()? { Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } Some(x) => { - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } None => Ok(ControlToken::Eof), @@ -203,13 +203,13 @@ where } None => { slf.emit_error(Error::EofBeforeTagName); - slf.emitter.emit_string("<"); + slf.emit_char('<'); Ok(ControlToken::Eof) } c @ Some(_) => { slf.emit_error(Error::InvalidFirstCharacterOfTagName); slf.state = State::Data; - slf.emitter.emit_string("<"); + slf.emit_char('<'); slf.unread_char(c); Ok(ControlToken::Continue) } @@ -228,7 +228,7 @@ where } None => { slf.emit_error(Error::EofBeforeTagName); - slf.emitter.emit_string("</"); + slf.emit_chars(b"</"); Ok(ControlToken::Eof) } Some(x) => { @@ -277,7 +277,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("<"); + slf.emit_char('<'); slf.state = State::RcData; slf.unread_char(c); Ok(ControlToken::Continue) @@ -291,7 +291,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("</"); + slf.emit_chars(b"</"); slf.state = State::RcData; slf.unread_char(c); Ok(ControlToken::Continue) @@ -317,7 +317,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("</"); + slf.emit_chars(b"</"); slf.flush_buffer_characters(); slf.state = State::RcData; @@ -332,7 +332,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("<"); + slf.emit_char('<'); slf.state = State::RawText; slf.unread_char(c); Ok(ControlToken::Continue) @@ -346,7 +346,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("</"); + slf.emit_chars(b"</"); slf.state = State::RawText; slf.unread_char(c); Ok(ControlToken::Continue) @@ -372,7 +372,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("</"); + slf.emit_chars(b"</"); slf.flush_buffer_characters(); slf.state = State::RawText; @@ -388,11 +388,11 @@ where } Some('!') => { slf.state = State::ScriptDataEscapeStart; - slf.emitter.emit_string("<!"); + slf.emit_chars(b"<!"); Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("<"); + slf.emit_char('<'); slf.state = State::ScriptData; slf.unread_char(c); Ok(ControlToken::Continue) @@ -406,7 +406,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("</"); + slf.emit_chars(b"</"); slf.state = State::ScriptData; slf.unread_char(c); Ok(ControlToken::Continue) @@ -432,7 +432,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("</"); + slf.emit_chars(b"</"); slf.flush_buffer_characters(); slf.state = State::Data; slf.unread_char(c); @@ -442,7 +442,7 @@ where State::ScriptDataEscapeStart => match slf.read_char()? { Some('-') => { slf.state = State::ScriptDataEscapeStartDash; - slf.emitter.emit_string("-"); + slf.emit_char('-'); Ok(ControlToken::Continue) } c => { @@ -454,7 +454,7 @@ where State::ScriptDataEscapeStartDash => match slf.read_char()? { Some('-') => { slf.state = State::ScriptDataEscapedDashDash; - slf.emitter.emit_string("-"); + slf.emit_char('-'); Ok(ControlToken::Continue) } c => { @@ -466,7 +466,7 @@ where State::ScriptDataEscaped => match slf.read_char()? { Some('-') => { slf.state = State::ScriptDataEscapedDash; - slf.emitter.emit_string("-"); + slf.emit_char('-'); Ok(ControlToken::Continue) } Some('<') => { @@ -475,7 +475,7 @@ where } Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } None => { @@ -483,14 +483,14 @@ where Ok(ControlToken::Eof) } Some(x) => { - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } }, State::ScriptDataEscapedDash => match slf.read_char()? { Some('-') => { slf.state = State::ScriptDataEscapedDashDash; - slf.emitter.emit_string("-"); + slf.emit_char('-'); Ok(ControlToken::Continue) } Some('<') => { @@ -500,7 +500,7 @@ where Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); slf.state = State::ScriptDataEscaped; - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } None => { @@ -509,13 +509,13 @@ where } Some(x) => { slf.state = State::ScriptDataEscaped; - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } }, State::ScriptDataEscapedDashDash => match slf.read_char()? { Some('-') => { - slf.emitter.emit_string("-"); + slf.emit_char('-'); Ok(ControlToken::Continue) } Some('<') => { @@ -524,13 +524,13 @@ where } Some('>') => { slf.state = State::ScriptData; - slf.emitter.emit_string(">"); + slf.emit_char('>'); Ok(ControlToken::Continue) } Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); slf.state = State::ScriptDataEscaped; - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } None => { @@ -539,7 +539,7 @@ where } Some(x) => { slf.state = State::ScriptDataEscaped; - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } }, @@ -551,13 +551,13 @@ where } Some(x) if x.is_ascii_alphabetic() => { slf.temporary_buffer.clear(); - slf.emitter.emit_string("<"); + slf.emit_char('<'); slf.state = State::ScriptDataDoubleEscapeStart; slf.unread_char(Some(x)); Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("<"); + slf.emit_char('<'); slf.state = State::ScriptDataEscaped; slf.unread_char(c); Ok(ControlToken::Continue) @@ -571,7 +571,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("</"); + slf.emit_chars(b"</"); slf.unread_char(c); slf.state = State::ScriptDataEscaped; Ok(ControlToken::Continue) @@ -597,7 +597,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("</"); + slf.emit_chars(b"</"); slf.flush_buffer_characters(); slf.state = State::ScriptDataEscaped; slf.unread_char(c); @@ -611,12 +611,12 @@ where } else { slf.state = State::ScriptDataEscaped; } - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } Some(x) if x.is_ascii_alphabetic() => { slf.temporary_buffer.push(x.to_ascii_lowercase()); - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } c => { @@ -628,17 +628,17 @@ where State::ScriptDataDoubleEscaped => match slf.read_char()? { Some('-') => { slf.state = State::ScriptDataDoubleEscapedDash; - slf.emitter.emit_string("-"); + slf.emit_char('-'); Ok(ControlToken::Continue) } Some('<') => { slf.state = State::ScriptDataDoubleEscapedLessThanSign; - slf.emitter.emit_string("<"); + slf.emit_char('<'); Ok(ControlToken::Continue) } Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } None => { @@ -646,25 +646,25 @@ where Ok(ControlToken::Eof) } Some(x) => { - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } }, State::ScriptDataDoubleEscapedDash => match slf.read_char()? { Some('-') => { slf.state = State::ScriptDataDoubleEscapedDashDash; - slf.emitter.emit_string("-"); + slf.emit_char('-'); Ok(ControlToken::Continue) } Some('<') => { slf.state = State::ScriptDataDoubleEscapedLessThanSign; - slf.emitter.emit_string("<"); + slf.emit_char('<'); Ok(ControlToken::Continue) } Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); slf.state = State::ScriptDataDoubleEscaped; - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } None => { @@ -673,29 +673,29 @@ where } Some(x) => { slf.state = State::ScriptDataDoubleEscaped; - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } }, State::ScriptDataDoubleEscapedDashDash => match slf.read_char()? { Some('-') => { - slf.emitter.emit_string("-"); + slf.emit_char('-'); Ok(ControlToken::Continue) } Some('<') => { - slf.emitter.emit_string("<"); + slf.emit_char('<'); slf.state = State::ScriptDataDoubleEscapedLessThanSign; Ok(ControlToken::Continue) } Some('>') => { - slf.emitter.emit_string(">"); + slf.emit_char('>'); slf.state = State::ScriptData; Ok(ControlToken::Continue) } Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); slf.state = State::ScriptDataDoubleEscaped; - slf.emitter.emit_string("\u{fffd}"); + slf.emit_char('\u{fffd}'); Ok(ControlToken::Continue) } None => { @@ -704,7 +704,7 @@ where } Some(x) => { slf.state = State::ScriptDataDoubleEscaped; - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } }, @@ -712,7 +712,7 @@ where Some('/') => { slf.temporary_buffer.clear(); slf.state = State::ScriptDataDoubleEscapeEnd; - slf.emitter.emit_string("/"); + slf.emit_char('/'); Ok(ControlToken::Continue) } c => { @@ -729,12 +729,12 @@ where slf.state = State::ScriptDataDoubleEscaped; } - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } Some(x) if x.is_ascii_alphabetic() => { slf.temporary_buffer.push(x.to_ascii_lowercase()); - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } c => { @@ -1741,7 +1741,7 @@ where Ok(ControlToken::Eof) } Some(x) => { - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); Ok(ControlToken::Continue) } }, @@ -1751,7 +1751,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("]"); + slf.emit_char(']'); slf.state = State::CdataSection; slf.unread_char(c); Ok(ControlToken::Continue) @@ -1759,7 +1759,7 @@ where }, State::CdataSectionEnd => match slf.read_char()? { Some(']') => { - slf.emitter.emit_string("]"); + slf.emit_char(']'); Ok(ControlToken::Continue) } Some('>') => { @@ -1767,7 +1767,7 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.emit_string("]]"); + slf.emit_chars(b"]]"); slf.unread_char(c); slf.state = State::CdataSection; Ok(ControlToken::Continue) @@ -1846,7 +1846,7 @@ where if slf.is_consumed_as_part_of_an_attribute() { slf.emitter.push_attribute_value(ctostr!(x)); } else { - slf.emitter.emit_string(ctostr!(x)); + slf.emit_char(x); } Ok(ControlToken::Continue) diff --git a/src/tokenizer/machine/utils.rs b/src/tokenizer/machine/utils.rs index 6e45f4d..d96e50b 100644 --- a/src/tokenizer/machine/utils.rs +++ b/src/tokenizer/machine/utils.rs @@ -13,6 +13,24 @@ where O: Offset, E: Emitter<O>, { + /// Emits the given character as a character token. + #[inline] + pub(super) fn emit_char(&mut self, c: char) { + self.emitter.emit_string(ctostr!(c)); + } + + /// Emits every byte of the given byte slice as a character token. + /// + /// (We're operating on bytes to enable compiler optimization, + /// since [`str::chars`] isn't `const`.) + #[inline] + pub(super) fn emit_chars(&mut self, s: &[u8]) { + self.emitter.emit_string( + // this unsafe block is only temporary and will be removed in the next commit + unsafe { std::str::from_utf8_unchecked(s) }, + ); + } + #[inline] pub(crate) fn emit_error(&mut self, error: Error) { let span = match error { |