diff options
| author | Martin Fischer <martin@push-f.com> | 2023-09-09 22:15:54 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2023-09-28 10:36:08 +0200 | 
| commit | 394c52260e861e911e2d8706d4904136a920da87 (patch) | |
| tree | b7511677c6f6a0ca03526991a770ef0b6752e963 /src/tokenizer | |
| parent | 5aa3b82fbe62882da8007b0a4548b979c845aa97 (diff) | |
refactor: proxy emit_string calls through utils
This is done separately so that the next commit has a cleaner diff.
Diffstat (limited to 'src/tokenizer')
| -rw-r--r-- | src/tokenizer/machine.rs | 124 | ||||
| -rw-r--r-- | src/tokenizer/machine/utils.rs | 18 | 
2 files changed, 80 insertions, 62 deletions
| diff --git a/src/tokenizer/machine.rs b/src/tokenizer/machine.rs index e9a3e68..8b09aa7 100644 --- a/src/tokenizer/machine.rs +++ b/src/tokenizer/machine.rs @@ -105,11 +105,11 @@ where              }              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter); -                slf.emitter.emit_string("\0"); +                slf.emit_char('\0');                  Ok(ControlToken::Continue)              }              Some(x) => { -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }              None => Ok(ControlToken::Eof), @@ -126,11 +126,11 @@ where              }              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter); -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              Some(x) => { -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }              None => Ok(ControlToken::Eof), @@ -142,11 +142,11 @@ where              }              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter); -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              Some(x) => { -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }              None => Ok(ControlToken::Eof), @@ -158,11 +158,11 @@ where              }              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter); -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              Some(x) => { -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }              None => Ok(ControlToken::Eof), @@ -170,11 +170,11 @@ where          State::PlainText => match slf.read_char()? {              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter); -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              Some(x) => { -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }              None => Ok(ControlToken::Eof), @@ -203,13 +203,13 @@ where              }              None => {                  slf.emit_error(Error::EofBeforeTagName); -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  Ok(ControlToken::Eof)              }              c @ Some(_) => {                  slf.emit_error(Error::InvalidFirstCharacterOfTagName);                  slf.state = State::Data; -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  slf.unread_char(c);                  Ok(ControlToken::Continue)              } @@ -228,7 +228,7 @@ where              }              None => {                  slf.emit_error(Error::EofBeforeTagName); -                slf.emitter.emit_string("</"); +                slf.emit_chars(b"</");                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -277,7 +277,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  slf.state = State::RcData;                  slf.unread_char(c);                  Ok(ControlToken::Continue) @@ -291,7 +291,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("</"); +                slf.emit_chars(b"</");                  slf.state = State::RcData;                  slf.unread_char(c);                  Ok(ControlToken::Continue) @@ -317,7 +317,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("</"); +                slf.emit_chars(b"</");                  slf.flush_buffer_characters();                  slf.state = State::RcData; @@ -332,7 +332,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  slf.state = State::RawText;                  slf.unread_char(c);                  Ok(ControlToken::Continue) @@ -346,7 +346,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("</"); +                slf.emit_chars(b"</");                  slf.state = State::RawText;                  slf.unread_char(c);                  Ok(ControlToken::Continue) @@ -372,7 +372,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("</"); +                slf.emit_chars(b"</");                  slf.flush_buffer_characters();                  slf.state = State::RawText; @@ -388,11 +388,11 @@ where              }              Some('!') => {                  slf.state = State::ScriptDataEscapeStart; -                slf.emitter.emit_string("<!"); +                slf.emit_chars(b"<!");                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  slf.state = State::ScriptData;                  slf.unread_char(c);                  Ok(ControlToken::Continue) @@ -406,7 +406,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("</"); +                slf.emit_chars(b"</");                  slf.state = State::ScriptData;                  slf.unread_char(c);                  Ok(ControlToken::Continue) @@ -432,7 +432,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("</"); +                slf.emit_chars(b"</");                  slf.flush_buffer_characters();                  slf.state = State::Data;                  slf.unread_char(c); @@ -442,7 +442,7 @@ where          State::ScriptDataEscapeStart => match slf.read_char()? {              Some('-') => {                  slf.state = State::ScriptDataEscapeStartDash; -                slf.emitter.emit_string("-"); +                slf.emit_char('-');                  Ok(ControlToken::Continue)              }              c => { @@ -454,7 +454,7 @@ where          State::ScriptDataEscapeStartDash => match slf.read_char()? {              Some('-') => {                  slf.state = State::ScriptDataEscapedDashDash; -                slf.emitter.emit_string("-"); +                slf.emit_char('-');                  Ok(ControlToken::Continue)              }              c => { @@ -466,7 +466,7 @@ where          State::ScriptDataEscaped => match slf.read_char()? {              Some('-') => {                  slf.state = State::ScriptDataEscapedDash; -                slf.emitter.emit_string("-"); +                slf.emit_char('-');                  Ok(ControlToken::Continue)              }              Some('<') => { @@ -475,7 +475,7 @@ where              }              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter); -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              None => { @@ -483,14 +483,14 @@ where                  Ok(ControlToken::Eof)              }              Some(x) => { -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }          },          State::ScriptDataEscapedDash => match slf.read_char()? {              Some('-') => {                  slf.state = State::ScriptDataEscapedDashDash; -                slf.emitter.emit_string("-"); +                slf.emit_char('-');                  Ok(ControlToken::Continue)              }              Some('<') => { @@ -500,7 +500,7 @@ where              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter);                  slf.state = State::ScriptDataEscaped; -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              None => { @@ -509,13 +509,13 @@ where              }              Some(x) => {                  slf.state = State::ScriptDataEscaped; -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }          },          State::ScriptDataEscapedDashDash => match slf.read_char()? {              Some('-') => { -                slf.emitter.emit_string("-"); +                slf.emit_char('-');                  Ok(ControlToken::Continue)              }              Some('<') => { @@ -524,13 +524,13 @@ where              }              Some('>') => {                  slf.state = State::ScriptData; -                slf.emitter.emit_string(">"); +                slf.emit_char('>');                  Ok(ControlToken::Continue)              }              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter);                  slf.state = State::ScriptDataEscaped; -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              None => { @@ -539,7 +539,7 @@ where              }              Some(x) => {                  slf.state = State::ScriptDataEscaped; -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }          }, @@ -551,13 +551,13 @@ where              }              Some(x) if x.is_ascii_alphabetic() => {                  slf.temporary_buffer.clear(); -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  slf.state = State::ScriptDataDoubleEscapeStart;                  slf.unread_char(Some(x));                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  slf.state = State::ScriptDataEscaped;                  slf.unread_char(c);                  Ok(ControlToken::Continue) @@ -571,7 +571,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("</"); +                slf.emit_chars(b"</");                  slf.unread_char(c);                  slf.state = State::ScriptDataEscaped;                  Ok(ControlToken::Continue) @@ -597,7 +597,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("</"); +                slf.emit_chars(b"</");                  slf.flush_buffer_characters();                  slf.state = State::ScriptDataEscaped;                  slf.unread_char(c); @@ -611,12 +611,12 @@ where                  } else {                      slf.state = State::ScriptDataEscaped;                  } -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }              Some(x) if x.is_ascii_alphabetic() => {                  slf.temporary_buffer.push(x.to_ascii_lowercase()); -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }              c => { @@ -628,17 +628,17 @@ where          State::ScriptDataDoubleEscaped => match slf.read_char()? {              Some('-') => {                  slf.state = State::ScriptDataDoubleEscapedDash; -                slf.emitter.emit_string("-"); +                slf.emit_char('-');                  Ok(ControlToken::Continue)              }              Some('<') => {                  slf.state = State::ScriptDataDoubleEscapedLessThanSign; -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  Ok(ControlToken::Continue)              }              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter); -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              None => { @@ -646,25 +646,25 @@ where                  Ok(ControlToken::Eof)              }              Some(x) => { -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }          },          State::ScriptDataDoubleEscapedDash => match slf.read_char()? {              Some('-') => {                  slf.state = State::ScriptDataDoubleEscapedDashDash; -                slf.emitter.emit_string("-"); +                slf.emit_char('-');                  Ok(ControlToken::Continue)              }              Some('<') => {                  slf.state = State::ScriptDataDoubleEscapedLessThanSign; -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  Ok(ControlToken::Continue)              }              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter);                  slf.state = State::ScriptDataDoubleEscaped; -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              None => { @@ -673,29 +673,29 @@ where              }              Some(x) => {                  slf.state = State::ScriptDataDoubleEscaped; -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }          },          State::ScriptDataDoubleEscapedDashDash => match slf.read_char()? {              Some('-') => { -                slf.emitter.emit_string("-"); +                slf.emit_char('-');                  Ok(ControlToken::Continue)              }              Some('<') => { -                slf.emitter.emit_string("<"); +                slf.emit_char('<');                  slf.state = State::ScriptDataDoubleEscapedLessThanSign;                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_string(">"); +                slf.emit_char('>');                  slf.state = State::ScriptData;                  Ok(ControlToken::Continue)              }              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter);                  slf.state = State::ScriptDataDoubleEscaped; -                slf.emitter.emit_string("\u{fffd}"); +                slf.emit_char('\u{fffd}');                  Ok(ControlToken::Continue)              }              None => { @@ -704,7 +704,7 @@ where              }              Some(x) => {                  slf.state = State::ScriptDataDoubleEscaped; -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }          }, @@ -712,7 +712,7 @@ where              Some('/') => {                  slf.temporary_buffer.clear();                  slf.state = State::ScriptDataDoubleEscapeEnd; -                slf.emitter.emit_string("/"); +                slf.emit_char('/');                  Ok(ControlToken::Continue)              }              c => { @@ -729,12 +729,12 @@ where                      slf.state = State::ScriptDataDoubleEscaped;                  } -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }              Some(x) if x.is_ascii_alphabetic() => {                  slf.temporary_buffer.push(x.to_ascii_lowercase()); -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }              c => { @@ -1741,7 +1741,7 @@ where                  Ok(ControlToken::Eof)              }              Some(x) => { -                slf.emitter.emit_string(ctostr!(x)); +                slf.emit_char(x);                  Ok(ControlToken::Continue)              }          }, @@ -1751,7 +1751,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("]"); +                slf.emit_char(']');                  slf.state = State::CdataSection;                  slf.unread_char(c);                  Ok(ControlToken::Continue) @@ -1759,7 +1759,7 @@ where          },          State::CdataSectionEnd => match slf.read_char()? {              Some(']') => { -                slf.emitter.emit_string("]"); +                slf.emit_char(']');                  Ok(ControlToken::Continue)              }              Some('>') => { @@ -1767,7 +1767,7 @@ where                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_string("]]"); +                slf.emit_chars(b"]]");                  slf.unread_char(c);                  slf.state = State::CdataSection;                  Ok(ControlToken::Continue) @@ -1846,7 +1846,7 @@ where                  if slf.is_consumed_as_part_of_an_attribute() {                      slf.emitter.push_attribute_value(ctostr!(x));                  } else { -                    slf.emitter.emit_string(ctostr!(x)); +                    slf.emit_char(x);                  }                  Ok(ControlToken::Continue) diff --git a/src/tokenizer/machine/utils.rs b/src/tokenizer/machine/utils.rs index 6e45f4d..d96e50b 100644 --- a/src/tokenizer/machine/utils.rs +++ b/src/tokenizer/machine/utils.rs @@ -13,6 +13,24 @@ where      O: Offset,      E: Emitter<O>,  { +    /// Emits the given character as a character token. +    #[inline] +    pub(super) fn emit_char(&mut self, c: char) { +        self.emitter.emit_string(ctostr!(c)); +    } + +    /// Emits every byte of the given byte slice as a character token. +    /// +    /// (We're operating on bytes to enable compiler optimization, +    /// since [`str::chars`] isn't `const`.) +    #[inline] +    pub(super) fn emit_chars(&mut self, s: &[u8]) { +        self.emitter.emit_string( +            // this unsafe block is only temporary and will be removed in the next commit +            unsafe { std::str::from_utf8_unchecked(s) }, +        ); +    } +      #[inline]      pub(crate) fn emit_error(&mut self, error: Error) {          let span = match error { | 
