diff options
Diffstat (limited to 'src/tokenizer/machine')
| -rw-r--r-- | src/tokenizer/machine/utils.rs | 30 | 
1 files changed, 24 insertions, 6 deletions
| diff --git a/src/tokenizer/machine/utils.rs b/src/tokenizer/machine/utils.rs index 9752746..4d59282 100644 --- a/src/tokenizer/machine/utils.rs +++ b/src/tokenizer/machine/utils.rs @@ -17,20 +17,40 @@ where          self.reader.position()      } +    /// Emits the given character as a character token, with its span set according to the given source character. +    /// +    /// This method should only be used if `c != source_char`, otherwise [`Machine::emit_char`] should be used instead. +    #[inline] +    pub(super) fn emit_char_for_source_char(&mut self, c: char, source_char: char) { +        let pos = self.reader.position(); +        self.emitter.emit_char( +            c, +            pos - self.reader.len_of_char_in_current_encoding(source_char)..pos, +        ); +    } +      /// Emits the given character as a character token. +    /// +    /// The character MUST have been present literally in the read input.      #[inline]      pub(super) fn emit_char(&mut self, c: char) { -        self.emitter.emit_char(c); +        self.emit_char_for_source_char(c, c);      }      /// Emits every byte of the given byte slice as a character token.      /// +    /// Every byte MUST have been literally present as a character in the read input. +    ///      /// (We're operating on bytes to enable compiler optimization,      /// since [`str::chars`] isn't `const`.)      #[inline]      pub(super) fn emit_chars(&mut self, s: &[u8]) { +        let mut start = self.some_offset; +          for c in s { -            self.emit_char(*c as char); +            let end = start + self.reader.len_of_char_in_current_encoding(*c as char); +            self.emitter.emit_char(*c as char, start..end); +            start = end;          }      } @@ -207,10 +227,8 @@ where      }      pub(super) fn flush_buffer_characters(&mut self) { -        for c in self.temporary_buffer.chars() { -            self.emitter.emit_char(c); -        } -        self.temporary_buffer.clear(); +        let temporary_buffer = std::mem::take(&mut self.temporary_buffer); +        self.emit_chars(temporary_buffer.as_bytes());      }  } | 
