From 95afc5359e940398498310d46e81352f04b43a49 Mon Sep 17 00:00:00 2001
From: Markus Unterwaditzer <markus-honeypot@unterwaditzer.net>
Date: Sat, 27 Nov 2021 23:33:26 +0100
Subject: fix crash in try_read_string

---
 src/machine.rs   | 10 ++--------
 src/tokenizer.rs | 25 +++++++++++++++++--------
 src/utils.rs     |  8 ++++++++
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/src/machine.rs b/src/machine.rs
index 5991912..9f728dd 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1,16 +1,10 @@
 use crate::entities::try_read_character_reference;
 use crate::utils::{
-    ascii_digit_pat, control_pat, noncharacter_pat, surrogate_pat, whitespace_pat, ControlToken,
-    State,
+    ascii_digit_pat, control_pat, ctostr, noncharacter_pat, surrogate_pat, whitespace_pat,
+    ControlToken, State,
 };
 use crate::{Emitter, Error, Reader, Tokenizer};
 
-macro_rules! ctostr {
-    ($c:expr) => {
-        &*$c.encode_utf8(&mut [0; 4])
-    };
-}
-
 // Note: This is not implemented as a method on Tokenizer because there's fields on Tokenizer that
 // should not be available in this method, such as Tokenizer.to_reconsume or the Reader instance
 #[inline]
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index d7e60ac..7430cbc 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -3,7 +3,7 @@ use crate::utils::{control_pat, noncharacter_pat, surrogate_pat, ControlToken, S
 use crate::{DefaultEmitter, Emitter, Error, Never, Readable, Reader};
 
 // this is a stack that can hold 0 to 2 Ts
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Clone, Copy)]
 struct Stack2<T: Copy>(Option<(T, Option<T>)>);
 
 impl<T: Copy> Stack2<T> {
@@ -26,11 +26,6 @@ impl<T: Copy> Stack2<T> {
         self.0 = new_self;
         rv
     }
-
-    #[inline]
-    fn is_empty(&self) -> bool {
-        matches!(self.0, None)
-    }
 }
 
 /// A HTML tokenizer. See crate-level docs for basic usage.
@@ -154,11 +149,25 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
     #[inline]
     pub(crate) fn try_read_string(
         &mut self,
-        s: &str,
+        mut s: &str,
         case_sensitive: bool,
     ) -> Result<bool, R::Error> {
         debug_assert!(!s.is_empty());
-        debug_assert!(self.to_reconsume.is_empty());
+
+        let to_reconsume_bak = self.to_reconsume;
+        let mut chars = s.chars();
+        while let Some(c) = self.to_reconsume.pop() {
+            if let (Some(x), Some(x2)) = (c, chars.next()) {
+                if x == x2 || (!case_sensitive && x.to_ascii_lowercase() == x2.to_ascii_lowercase()) {
+                    s = &s[x.len_utf8()..];
+                    continue;
+                }
+            }
+
+            self.to_reconsume = to_reconsume_bak;
+            return Ok(false);
+        }
+
         self.reader.try_read_string(s, case_sensitive)
     }
 
diff --git a/src/utils.rs b/src/utils.rs
index 67db1b9..c3d2f1a 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -162,3 +162,11 @@ pub enum ControlToken {
     Eof,
     Continue,
 }
+
+macro_rules! ctostr {
+    ($c:expr) => {
+        &*$c.encode_utf8(&mut [0; 4])
+    };
+}
+
+pub(crate) use ctostr;
-- 
cgit v1.2.3