From ee8ab781672e7ab608e74a5b605eb189828f0afe Mon Sep 17 00:00:00 2001
From: Martin Fischer <martin@push-f.com>
Date: Tue, 26 Sep 2023 08:22:21 +0200
Subject: fix(tokenizer): don't lowercase temp chars in ScriptDataEndTagName

This bug resulted in e.g. "<script></SCRI" being wrongly tokenized as:

    StartTag(StartTag { name: "script", self_closing: false, attributes: {} })
    Char('<')
    Char('/')
    Char('s')
    Char('c')
    Char('r')
    Char('i')
    EndOfFile

Note that the Char tokens should be uppercase. (This bug could only be
observed when properly doing state switching via tree construction.)
---
 src/tokenizer/machine.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/tokenizer')
diff --git a/src/tokenizer/machine.rs b/src/tokenizer/machine.rs
index 944eb01..100f645 100644
--- a/src/tokenizer/machine.rs
+++ b/src/tokenizer/machine.rs
@@ -428,7 +428,7 @@ where
             }
             Some(x) if x.is_ascii_alphabetic() => {
                 slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
-                slf.temporary_buffer.push(x.to_ascii_lowercase());
+                slf.temporary_buffer.push(x);
                 Ok(ControlToken::Continue)
             }
             c => {
-- 
cgit v1.2.3