diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-15 09:40:55 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-28 10:36:08 +0200 |
commit | d46de6ab592e57a31fef13cfc015c4ce818e8f47 (patch) | |
tree | 814654b579f0a1754193a40786f09a711adc021a /src/tokenizer/machine | |
parent | 2e986862920b438b253fd6e6f11d8f4e5d6f4e27 (diff) |
feat: add span to Trace::Char
Diffstat (limited to 'src/tokenizer/machine')
-rw-r--r-- | src/tokenizer/machine/utils.rs | 30 |
1 files changed, 24 insertions, 6 deletions
diff --git a/src/tokenizer/machine/utils.rs b/src/tokenizer/machine/utils.rs index 9752746..4d59282 100644 --- a/src/tokenizer/machine/utils.rs +++ b/src/tokenizer/machine/utils.rs @@ -17,20 +17,40 @@ where self.reader.position() } + /// Emits the given character as a character token, with its span set according to the given source character. + /// + /// This method should only be used if `c != source_char`, otherwise [`Machine::emit_char`] should be used instead. + #[inline] + pub(super) fn emit_char_for_source_char(&mut self, c: char, source_char: char) { + let pos = self.reader.position(); + self.emitter.emit_char( + c, + pos - self.reader.len_of_char_in_current_encoding(source_char)..pos, + ); + } + /// Emits the given character as a character token. + /// + /// The character MUST have been present literally in the read input. #[inline] pub(super) fn emit_char(&mut self, c: char) { - self.emitter.emit_char(c); + self.emit_char_for_source_char(c, c); } /// Emits every byte of the given byte slice as a character token. /// + /// Every byte MUST have been literally present as a character in the read input. + /// /// (We're operating on bytes to enable compiler optimization, /// since [`str::chars`] isn't `const`.) #[inline] pub(super) fn emit_chars(&mut self, s: &[u8]) { + let mut start = self.some_offset; + for c in s { - self.emit_char(*c as char); + let end = start + self.reader.len_of_char_in_current_encoding(*c as char); + self.emitter.emit_char(*c as char, start..end); + start = end; } } @@ -207,10 +227,8 @@ where } pub(super) fn flush_buffer_characters(&mut self) { - for c in self.temporary_buffer.chars() { - self.emitter.emit_char(c); - } - self.temporary_buffer.clear(); + let temporary_buffer = std::mem::take(&mut self.temporary_buffer); + self.emit_chars(temporary_buffer.as_bytes()); } } |