diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/machine.rs | 10 | ||||
| -rw-r--r-- | src/tokenizer.rs | 25 | ||||
| -rw-r--r-- | src/utils.rs | 8 | 
3 files changed, 27 insertions, 16 deletions
| diff --git a/src/machine.rs b/src/machine.rs index 5991912..9f728dd 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1,16 +1,10 @@  use crate::entities::try_read_character_reference;  use crate::utils::{ -    ascii_digit_pat, control_pat, noncharacter_pat, surrogate_pat, whitespace_pat, ControlToken, -    State, +    ascii_digit_pat, control_pat, ctostr, noncharacter_pat, surrogate_pat, whitespace_pat, +    ControlToken, State,  };  use crate::{Emitter, Error, Reader, Tokenizer}; -macro_rules! ctostr { -    ($c:expr) => { -        &*$c.encode_utf8(&mut [0; 4]) -    }; -} -  // Note: This is not implemented as a method on Tokenizer because there's fields on Tokenizer that  // should not be available in this method, such as Tokenizer.to_reconsume or the Reader instance  #[inline] diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d7e60ac..7430cbc 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -3,7 +3,7 @@ use crate::utils::{control_pat, noncharacter_pat, surrogate_pat, ControlToken, S  use crate::{DefaultEmitter, Emitter, Error, Never, Readable, Reader};  // this is a stack that can hold 0 to 2 Ts -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy)]  struct Stack2<T: Copy>(Option<(T, Option<T>)>);  impl<T: Copy> Stack2<T> { @@ -26,11 +26,6 @@ impl<T: Copy> Stack2<T> {          self.0 = new_self;          rv      } - -    #[inline] -    fn is_empty(&self) -> bool { -        matches!(self.0, None) -    }  }  /// A HTML tokenizer. See crate-level docs for basic usage. @@ -154,11 +149,25 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {      #[inline]      pub(crate) fn try_read_string(          &mut self, -        s: &str, +        mut s: &str,          case_sensitive: bool,      ) -> Result<bool, R::Error> {          debug_assert!(!s.is_empty()); -        debug_assert!(self.to_reconsume.is_empty()); + +        let to_reconsume_bak = self.to_reconsume; +        let mut chars = s.chars(); +        while let Some(c) = self.to_reconsume.pop() { +            if let (Some(x), Some(x2)) = (c, chars.next()) { +                if x == x2 || (!case_sensitive && x.to_ascii_lowercase() == x2.to_ascii_lowercase()) { +                    s = &s[x.len_utf8()..]; +                    continue; +                } +            } + +            self.to_reconsume = to_reconsume_bak; +            return Ok(false); +        } +          self.reader.try_read_string(s, case_sensitive)      } diff --git a/src/utils.rs b/src/utils.rs index 67db1b9..c3d2f1a 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -162,3 +162,11 @@ pub enum ControlToken {      Eof,      Continue,  } + +macro_rules! ctostr { +    ($c:expr) => { +        &*$c.encode_utf8(&mut [0; 4]) +    }; +} + +pub(crate) use ctostr; | 
