aboutsummaryrefslogtreecommitdiff
path: root/src/reader.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/reader.rs')
-rw-r--r--src/reader.rs41
1 files changed, 13 insertions, 28 deletions
diff --git a/src/reader.rs b/src/reader.rs
index d2cae92..7246129 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -4,9 +4,8 @@
pub trait Reader {
/// Return a new character from the input stream.
///
- /// Newlines have to be normalized as described in [Preprocessing the input
- /// stream](https://html.spec.whatwg.org/#preprocessing-the-input-stream), however error
- /// emission is done within the tokenizer.
+ /// The input stream does **not** have to be preprocessed in any way, it can contain standalone
+ /// surrogates and have inconsistent newlines.
fn read_char(&mut self) -> Option<char>;
/// Attempt to read an entire string at once, either case-insensitively or not.
@@ -46,49 +45,35 @@ impl<'a, R: 'a + Reader> Readable<'a> for R {
/// from strings.
pub struct StringReader<'a> {
input: &'a str,
+ cursor: std::str::Chars<'a>,
pos: usize,
}
impl<'a> StringReader<'a> {
fn new(input: &'a str) -> Self {
- StringReader { input, pos: 0 }
- }
-
- fn peek_char(&self) -> Option<char> {
- self.input.get(self.pos..)?.chars().next()
+ let cursor = input.chars();
+ StringReader {
+ input,
+ cursor,
+ pos: 0,
+ }
}
}
impl<'a> Reader for StringReader<'a> {
fn read_char(&mut self) -> Option<char> {
- let mut r1 = match self.peek_char() {
- Some(x) => x,
- None => {
- self.pos += 1;
- return None;
- }
- };
-
- self.pos += r1.len_utf8();
-
- if r1 == '\r' {
- r1 = '\n';
- let r2 = self.peek_char();
- if r2 == Some('\n') {
- self.pos += r2.map(char::len_utf8).unwrap_or(0);
- }
- }
-
- Some(r1)
+ let c = self.cursor.next()?;
+ self.pos += c.len_utf8();
+ Some(c)
}
fn try_read_string(&mut self, s1: &str, case_sensitive: bool) -> bool {
// we do not need to call validate_char here because `s` hopefully does not contain invalid
// characters
-
if let Some(s2) = self.input.get(self.pos..self.pos + s1.len()) {
if s1 == s2 || (!case_sensitive && s1.eq_ignore_ascii_case(s2)) {
self.pos += s1.len();
+ self.cursor = self.input[self.pos..].chars();
return true;
}
}