diff options
author | Martin Fischer <martin@push-f.com> | 2023-09-01 14:59:08 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-09-03 23:00:05 +0200 |
commit | 153acbee5650b9b006c7de215fb1421a64516fb4 (patch) | |
tree | ed363c6e00035cb227b183170bbd42495c84c0ca /src/offset.rs | |
parent | eea8c60fba5962d5575c8515b837fb6304f73c9d (diff) |
fix!: make PosTrackingReader encoding-independent
While much of the span logic currently assumes UTF-8, we also
want to support other character encodings, such as e.g. UTF-16
where characters can take up more or less bytes than in UTF-8.
Diffstat (limited to 'src/offset.rs')
-rw-r--r-- | src/offset.rs | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/src/offset.rs b/src/offset.rs index 3152c78..bdb069a 100644 --- a/src/offset.rs +++ b/src/offset.rs @@ -98,7 +98,7 @@ impl<R: Reader> Reader for PosTrackingReader<R> { fn read_char(&mut self) -> Result<Option<char>, Self::Error> { match self.reader.read_char()? { Some(char) => { - self.position += char.len_utf8(); + self.position += self.reader.len_of_char_in_current_encoding(char); Ok(Some(char)) } None => Ok(None), @@ -108,10 +108,16 @@ impl<R: Reader> Reader for PosTrackingReader<R> { fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> { match self.reader.try_read_string(s, case_sensitive)? { true => { - self.position += s.len(); + for c in s.chars() { + self.position += self.reader.len_of_char_in_current_encoding(c); + } Ok(true) } false => Ok(false), } } + + fn len_of_char_in_current_encoding(&self, c: char) -> usize { + self.reader.len_of_char_in_current_encoding(c) + } } |