aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer/machine
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer/machine')
-rw-r--r--src/tokenizer/machine/utils.rs30
1 files changed, 24 insertions, 6 deletions
diff --git a/src/tokenizer/machine/utils.rs b/src/tokenizer/machine/utils.rs
index 9752746..4d59282 100644
--- a/src/tokenizer/machine/utils.rs
+++ b/src/tokenizer/machine/utils.rs
@@ -17,20 +17,40 @@ where
self.reader.position()
}
+ /// Emits the given character as a character token, with its span set according to the given source character.
+ ///
+ /// This method should only be used if `c != source_char`, otherwise [`Machine::emit_char`] should be used instead.
+ #[inline]
+ pub(super) fn emit_char_for_source_char(&mut self, c: char, source_char: char) {
+ let pos = self.reader.position();
+ self.emitter.emit_char(
+ c,
+ pos - self.reader.len_of_char_in_current_encoding(source_char)..pos,
+ );
+ }
+
/// Emits the given character as a character token.
+ ///
+ /// The character MUST have been present literally in the read input.
#[inline]
pub(super) fn emit_char(&mut self, c: char) {
- self.emitter.emit_char(c);
+ self.emit_char_for_source_char(c, c);
}
/// Emits every byte of the given byte slice as a character token.
///
+ /// Every byte MUST have been literally present as a character in the read input.
+ ///
/// (We're operating on bytes to enable compiler optimization,
/// since [`str::chars`] isn't `const`.)
#[inline]
pub(super) fn emit_chars(&mut self, s: &[u8]) {
+ let mut start = self.some_offset;
+
for c in s {
- self.emit_char(*c as char);
+ let end = start + self.reader.len_of_char_in_current_encoding(*c as char);
+ self.emitter.emit_char(*c as char, start..end);
+ start = end;
}
}
@@ -207,10 +227,8 @@ where
}
pub(super) fn flush_buffer_characters(&mut self) {
- for c in self.temporary_buffer.chars() {
- self.emitter.emit_char(c);
- }
- self.temporary_buffer.clear();
+ let temporary_buffer = std::mem::take(&mut self.temporary_buffer);
+ self.emit_chars(temporary_buffer.as_bytes());
}
}