aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.rs
diff options
context:
space:
mode:
authorMarkus Unterwaditzer <markus-honeypot@unterwaditzer.net>2021-11-27 23:33:26 +0100
committerMarkus Unterwaditzer <markus-honeypot@unterwaditzer.net>2021-11-27 23:43:54 +0100
commit95afc5359e940398498310d46e81352f04b43a49 (patch)
treeb89915478a19f7f8673bb4663e9e7fa112abf09c /src/tokenizer.rs
parent96808d0d940e1580cf86e433d0c844e943157e0d (diff)
fix crash in try_read_string
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r--src/tokenizer.rs25
1 files changed, 17 insertions, 8 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index d7e60ac..7430cbc 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -3,7 +3,7 @@ use crate::utils::{control_pat, noncharacter_pat, surrogate_pat, ControlToken, S
use crate::{DefaultEmitter, Emitter, Error, Never, Readable, Reader};
// this is a stack that can hold 0 to 2 Ts
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Clone, Copy)]
struct Stack2<T: Copy>(Option<(T, Option<T>)>);
impl<T: Copy> Stack2<T> {
@@ -26,11 +26,6 @@ impl<T: Copy> Stack2<T> {
self.0 = new_self;
rv
}
-
- #[inline]
- fn is_empty(&self) -> bool {
- matches!(self.0, None)
- }
}
/// A HTML tokenizer. See crate-level docs for basic usage.
@@ -154,11 +149,25 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
#[inline]
pub(crate) fn try_read_string(
&mut self,
- s: &str,
+ mut s: &str,
case_sensitive: bool,
) -> Result<bool, R::Error> {
debug_assert!(!s.is_empty());
- debug_assert!(self.to_reconsume.is_empty());
+
+ let to_reconsume_bak = self.to_reconsume;
+ let mut chars = s.chars();
+ while let Some(c) = self.to_reconsume.pop() {
+ if let (Some(x), Some(x2)) = (c, chars.next()) {
+ if x == x2 || (!case_sensitive && x.to_ascii_lowercase() == x2.to_ascii_lowercase()) {
+ s = &s[x.len_utf8()..];
+ continue;
+ }
+ }
+
+ self.to_reconsume = to_reconsume_bak;
+ return Ok(false);
+ }
+
self.reader.try_read_string(s, case_sensitive)
}