diff options
| author | Markus Unterwaditzer <markus-honeypot@unterwaditzer.net> | 2021-11-26 15:02:11 +0100 | 
|---|---|---|
| committer | Markus Unterwaditzer <markus-honeypot@unterwaditzer.net> | 2021-11-26 15:02:11 +0100 | 
| commit | 14e392f8ed9d85a2e1c91857e223473a290ac480 (patch) | |
| tree | 08adb2740af5922f141f682e65744c264f6fe30c /src/reader.rs | |
| parent | c9613f071d36c47b5017e6ba61236fdb395be203 (diff) | |
clean up reader interface
Diffstat (limited to 'src/reader.rs')
| -rw-r--r-- | src/reader.rs | 41 | 
1 files changed, 13 insertions, 28 deletions
| diff --git a/src/reader.rs b/src/reader.rs index d2cae92..7246129 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -4,9 +4,8 @@  pub trait Reader {      /// Return a new character from the input stream.      /// -    /// Newlines have to be normalized as described in [Preprocessing the input -    /// stream](https://html.spec.whatwg.org/#preprocessing-the-input-stream), however error -    /// emission is done within the tokenizer. +    /// The input stream does **not** have to be preprocessed in any way, it can contain standalone +    /// surrogates and have inconsistent newlines.      fn read_char(&mut self) -> Option<char>;      /// Attempt to read an entire string at once, either case-insensitively or not. @@ -46,49 +45,35 @@ impl<'a, R: 'a + Reader> Readable<'a> for R {  /// from strings.  pub struct StringReader<'a> {      input: &'a str, +    cursor: std::str::Chars<'a>,      pos: usize,  }  impl<'a> StringReader<'a> {      fn new(input: &'a str) -> Self { -        StringReader { input, pos: 0 } -    } - -    fn peek_char(&self) -> Option<char> { -        self.input.get(self.pos..)?.chars().next() +        let cursor = input.chars(); +        StringReader { +            input, +            cursor, +            pos: 0, +        }      }  }  impl<'a> Reader for StringReader<'a> {      fn read_char(&mut self) -> Option<char> { -        let mut r1 = match self.peek_char() { -            Some(x) => x, -            None => { -                self.pos += 1; -                return None; -            } -        }; - -        self.pos += r1.len_utf8(); - -        if r1 == '\r' { -            r1 = '\n'; -            let r2 = self.peek_char(); -            if r2 == Some('\n') { -                self.pos += r2.map(char::len_utf8).unwrap_or(0); -            } -        } - -        Some(r1) +        let c = self.cursor.next()?; +        self.pos += c.len_utf8(); +        Some(c)      }      fn try_read_string(&mut self, s1: &str, case_sensitive: bool) -> bool {          // we do not need to call validate_char here because `s` hopefully does not contain invalid          // characters -          if let Some(s2) = self.input.get(self.pos..self.pos + s1.len()) {              if s1 == s2 || (!case_sensitive && s1.eq_ignore_ascii_case(s2)) {                  self.pos += s1.len(); +                self.cursor = self.input[self.pos..].chars();                  return true;              }          } | 
