From ee8ab781672e7ab608e74a5b605eb189828f0afe Mon Sep 17 00:00:00 2001 From: Martin Fischer <martin@push-f.com> Date: Tue, 26 Sep 2023 08:22:21 +0200 Subject: fix(tokenizer): don't lowercase temp chars in ScriptDataEndTagName This bug resulted in e.g. "<script></SCRI" being wrongly tokenized as: StartTag(StartTag { name: "script", self_closing: false, attributes: {} }) Char('<') Char('/') Char('s') Char('c') Char('r') Char('i') EndOfFile Note that the Char tokens should be uppercase. (This bug could only be observed when properly doing state switching via tree construction.) --- src/tokenizer/machine.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/tokenizer') diff --git a/src/tokenizer/machine.rs b/src/tokenizer/machine.rs index 944eb01..100f645 100644 --- a/src/tokenizer/machine.rs +++ b/src/tokenizer/machine.rs @@ -428,7 +428,7 @@ where } Some(x) if x.is_ascii_alphabetic() => { slf.push_tag_name(ctostr!(x.to_ascii_lowercase())); - slf.temporary_buffer.push(x.to_ascii_lowercase()); + slf.temporary_buffer.push(x); Ok(ControlToken::Continue) } c => { -- cgit v1.2.3