diff options
author | Markus Unterwaditzer <markus-honeypot@unterwaditzer.net> | 2021-11-26 15:02:11 +0100 |
---|---|---|
committer | Markus Unterwaditzer <markus-honeypot@unterwaditzer.net> | 2021-11-26 15:02:11 +0100 |
commit | 14e392f8ed9d85a2e1c91857e223473a290ac480 (patch) | |
tree | 08adb2740af5922f141f682e65744c264f6fe30c /src/reader.rs | |
parent | c9613f071d36c47b5017e6ba61236fdb395be203 (diff) |
clean up reader interface
Diffstat (limited to 'src/reader.rs')
-rw-r--r-- | src/reader.rs | 41 |
1 files changed, 13 insertions, 28 deletions
diff --git a/src/reader.rs b/src/reader.rs index d2cae92..7246129 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -4,9 +4,8 @@ pub trait Reader { /// Return a new character from the input stream. /// - /// Newlines have to be normalized as described in [Preprocessing the input - /// stream](https://html.spec.whatwg.org/#preprocessing-the-input-stream), however error - /// emission is done within the tokenizer. + /// The input stream does **not** have to be preprocessed in any way, it can contain standalone + /// surrogates and have inconsistent newlines. fn read_char(&mut self) -> Option<char>; /// Attempt to read an entire string at once, either case-insensitively or not. @@ -46,49 +45,35 @@ impl<'a, R: 'a + Reader> Readable<'a> for R { /// from strings. pub struct StringReader<'a> { input: &'a str, + cursor: std::str::Chars<'a>, pos: usize, } impl<'a> StringReader<'a> { fn new(input: &'a str) -> Self { - StringReader { input, pos: 0 } - } - - fn peek_char(&self) -> Option<char> { - self.input.get(self.pos..)?.chars().next() + let cursor = input.chars(); + StringReader { + input, + cursor, + pos: 0, + } } } impl<'a> Reader for StringReader<'a> { fn read_char(&mut self) -> Option<char> { - let mut r1 = match self.peek_char() { - Some(x) => x, - None => { - self.pos += 1; - return None; - } - }; - - self.pos += r1.len_utf8(); - - if r1 == '\r' { - r1 = '\n'; - let r2 = self.peek_char(); - if r2 == Some('\n') { - self.pos += r2.map(char::len_utf8).unwrap_or(0); - } - } - - Some(r1) + let c = self.cursor.next()?; + self.pos += c.len_utf8(); + Some(c) } fn try_read_string(&mut self, s1: &str, case_sensitive: bool) -> bool { // we do not need to call validate_char here because `s` hopefully does not contain invalid // characters - if let Some(s2) = self.input.get(self.pos..self.pos + s1.len()) { if s1 == s2 || (!case_sensitive && s1.eq_ignore_ascii_case(s2)) { self.pos += s1.len(); + self.cursor = self.input[self.pos..].chars(); return true; } } |