refactor: split up go! macro calls

#!/usr/bin/env python3 import re def split_go(match): inner = match.group(1) stmts = inner.split(';') text = '{' for stmt in stmts: if stmt.startswith(' reconsume '): text += 'self.reconsume = true;' stmt = ' to' + stmt[len(' reconsume'):] text += 'go!(self:{});'.format(stmt) return text + '}' text = '' with open('src/tokenizer/mod.rs') as f: for line in f: if '$me:ident' in line: # skip macro rules text += line else: text += re.sub('go!$self:(.*)$', split_go, line) with open('src/tokenizer/mod.rs', 'w') as f: f.write(text) import subprocess subprocess.call(['cargo', 'fmt'])
author: Martin Fischer <martin@push-f.com> 2021-11-28 10:07:00 +0100
committer: Martin Fischer <martin@push-f.com> 2021-11-29 15:11:01 +0100
commit: 2b84e92f4f0981b099f30759330d00a73c90dee8 (patch)
tree: 31d39077aa50944ff9bbdd784f316b5811d9c31c
parent: 8c3be2ac471edc6f24059d552e82fb5acc3d7cc2 (diff)
1 files changed, 841 insertions, 221 deletions
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index a8ec39f..fc8bd7f 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -676,10 +676,19 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ data-state
             states::Data => loop {
                 match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) {
-                    FromSet('\0') => go!(self: error; emit '\0'),
-                    FromSet('&') => go!(self: consume_char_ref),
-                    FromSet('<') => go!(self: to TagOpen),
-                    FromSet(c) => go!(self: emit c),
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: emit '\0');
+                    }
+                    FromSet('&') => {
+                        go!(self: consume_char_ref);
+                    }
+                    FromSet('<') => {
+                        go!(self: to TagOpen);
+                    }
+                    FromSet(c) => {
+                        go!(self: emit c);
+                    }
                     NotFromSet(b) => self.emit_chars(b),
                 }
             },
@@ -687,10 +696,19 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ rcdata-state
             states::RawData(Rcdata) => loop {
                 match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) {
-                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
-                    FromSet('&') => go!(self: consume_char_ref),
-                    FromSet('<') => go!(self: to RawLessThanSign Rcdata),
-                    FromSet(c) => go!(self: emit c),
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: emit '\u{fffd}');
+                    }
+                    FromSet('&') => {
+                        go!(self: consume_char_ref);
+                    }
+                    FromSet('<') => {
+                        go!(self: to RawLessThanSign Rcdata);
+                    }
+                    FromSet(c) => {
+                        go!(self: emit c);
+                    }
                     NotFromSet(b) => self.emit_chars(b),
                 }
             },
@@ -698,9 +716,16 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ rawtext-state
             states::RawData(Rawtext) => loop {
                 match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) {
-                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
-                    FromSet('<') => go!(self: to RawLessThanSign Rawtext),
-                    FromSet(c) => go!(self: emit c),
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: emit '\u{fffd}');
+                    }
+                    FromSet('<') => {
+                        go!(self: to RawLessThanSign Rawtext);
+                    }
+                    FromSet(c) => {
+                        go!(self: emit c);
+                    }
                     NotFromSet(b) => self.emit_chars(b),
                 }
             },
@@ -708,9 +733,16 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ script-data-state
             states::RawData(ScriptData) => loop {
                 match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) {
-                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
-                    FromSet('<') => go!(self: to RawLessThanSign ScriptData),
-                    FromSet(c) => go!(self: emit c),
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: emit '\u{fffd}');
+                    }
+                    FromSet('<') => {
+                        go!(self: to RawLessThanSign ScriptData);
+                    }
+                    FromSet(c) => {
+                        go!(self: emit c);
+                    }
                     NotFromSet(b) => self.emit_chars(b),
                 }
             },
@@ -718,10 +750,20 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ script-data-escaped-state
             states::RawData(ScriptDataEscaped(Escaped)) => loop {
                 match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) {
-                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
-                    FromSet('-') => go!(self: emit '-'; to ScriptDataEscapedDash Escaped),
-                    FromSet('<') => go!(self: to RawLessThanSign ScriptDataEscaped Escaped),
-                    FromSet(c) => go!(self: emit c),
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: emit '\u{fffd}');
+                    }
+                    FromSet('-') => {
+                        go!(self: emit '-');
+                        go!(self: to ScriptDataEscapedDash Escaped);
+                    }
+                    FromSet('<') => {
+                        go!(self: to RawLessThanSign ScriptDataEscaped Escaped);
+                    }
+                    FromSet(c) => {
+                        go!(self: emit c);
+                    }
                     NotFromSet(b) => self.emit_chars(b),
                 }
             },
@@ -729,12 +771,21 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ script-data-double-escaped-state
             states::RawData(ScriptDataEscaped(DoubleEscaped)) => loop {
                 match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) {
-                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
-                    FromSet('-') => go!(self: emit '-'; to ScriptDataEscapedDash DoubleEscaped),
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: emit '\u{fffd}');
+                    }
+                    FromSet('-') => {
+                        go!(self: emit '-');
+                        go!(self: to ScriptDataEscapedDash DoubleEscaped);
+                    }
                     FromSet('<') => {
-                        go!(self: emit '<'; to RawLessThanSign ScriptDataEscaped DoubleEscaped)
+                        go!(self: emit '<');
+                        go!(self: to RawLessThanSign ScriptDataEscaped DoubleEscaped);
+                    }
+                    FromSet(c) => {
+                        go!(self: emit c);
                     }
-                    FromSet(c) => go!(self: emit c),
                     NotFromSet(b) => self.emit_chars(b),
                 }
             },
@@ -742,8 +793,13 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ plaintext-state
             states::Plaintext => loop {
                 match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n')) {
-                    FromSet('\0') => go!(self: error; emit '\u{fffd}'),
-                    FromSet(c) => go!(self: emit c),
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: emit '\u{fffd}');
+                    }
+                    FromSet(c) => {
+                        go!(self: emit c);
+                    }
                     NotFromSet(b) => self.emit_chars(b),
                 }
             },
@@ -751,12 +807,30 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ tag-open-state
             states::TagOpen => loop {
                 match get_char!(self, input) {
-                    '!' => go!(self: clear_temp; to MarkupDeclarationOpen),
-                    '/' => go!(self: to EndTagOpen),
-                    '?' => go!(self: error; clear_comment; push_comment '?'; to BogusComment),
+                    '!' => {
+                        go!(self: clear_temp);
+                        go!(self: to MarkupDeclarationOpen);
+                    }
+                    '/' => {
+                        go!(self: to EndTagOpen);
+                    }
+                    '?' => {
+                        go!(self: error);
+                        go!(self: clear_comment);
+                        go!(self: push_comment '?');
+                        go!(self: to BogusComment);
+                    }
                     c => match lower_ascii_letter(c) {
-                        Some(cl) => go!(self: create_tag StartTag cl; to TagName),
-                        None => go!(self: error; emit '<'; reconsume Data),
+                        Some(cl) => {
+                            go!(self: create_tag StartTag cl);
+                            go!(self: to TagName);
+                        }
+                        None => {
+                            go!(self: error);
+                            go!(self: emit '<');
+                            self.reconsume = true;
+                            go!(self: to Data);
+                        }
                     },
                 }
             },
@@ -764,13 +838,27 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ end-tag-open-state
             states::EndTagOpen => loop {
                 match get_char!(self, input) {
-                    '>' => go!(self: error; to Data),
+                    '>' => {
+                        go!(self: error);
+                        go!(self: to Data);
+                    }
                     '\0' => {
-                        go!(self: error; clear_comment; push_comment '\u{fffd}'; to BogusComment)
+                        go!(self: error);
+                        go!(self: clear_comment);
+                        go!(self: push_comment '\u{fffd}');
+                        go!(self: to BogusComment);
                     }
                     c => match lower_ascii_letter(c) {
-                        Some(cl) => go!(self: create_tag EndTag cl; to TagName),
-                        None => go!(self: error; clear_comment; push_comment c; to BogusComment),
+                        Some(cl) => {
+                            go!(self: create_tag EndTag cl);
+                            go!(self: to TagName);
+                        }
+                        None => {
+                            go!(self: error);
+                            go!(self: clear_comment);
+                            go!(self: push_comment c);
+                            go!(self: to BogusComment);
+                        }
                     },
                 }
             },
@@ -778,23 +866,45 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ tag-name-state
             states::TagName => loop {
                 match get_char!(self, input) {
-                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName),
-                    '/' => go!(self: to SelfClosingStartTag),
-                    '>' => go!(self: emit_tag Data),
-                    '\0' => go!(self: error; push_tag '\u{fffd}'),
-                    c => go!(self: push_tag (c.to_ascii_lowercase())),
+                    '\t' | '\n' | '\x0C' | ' ' => {
+                        go!(self: to BeforeAttributeName);
+                    }
+                    '/' => {
+                        go!(self: to SelfClosingStartTag);
+                    }
+                    '>' => {
+                        go!(self: emit_tag Data);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: push_tag '\u{fffd}');
+                    }
+                    c => {
+                        go!(self: push_tag (c.to_ascii_lowercase()));
+                    }
                 }
             },
 
             //§ script-data-escaped-less-than-sign-state
             states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop {
                 match get_char!(self, input) {
-                    '/' => go!(self: clear_temp; to RawEndTagOpen ScriptDataEscaped Escaped),
+                    '/' => {
+                        go!(self: clear_temp);
+                        go!(self: to RawEndTagOpen ScriptDataEscaped Escaped);
+                    }
                     c => match lower_ascii_letter(c) {
                         Some(cl) => {
-                            go!(self: clear_temp; push_temp cl; emit '<'; emit c; to ScriptDataEscapeStart DoubleEscaped)
+                            go!(self: clear_temp);
+                            go!(self: push_temp cl);
+                            go!(self: emit '<');
+                            go!(self: emit c);
+                            go!(self: to ScriptDataEscapeStart DoubleEscaped);
+                        }
+                        None => {
+                            go!(self: emit '<');
+                            self.reconsume = true;
+                            go!(self: to RawData ScriptDataEscaped Escaped);
                         }
-                        None => go!(self: emit '<'; reconsume RawData ScriptDataEscaped Escaped),
                     },
                 }
             },
@@ -802,8 +912,15 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ script-data-double-escaped-less-than-sign-state
             states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop {
                 match get_char!(self, input) {
-                    '/' => go!(self: clear_temp; emit '/'; to ScriptDataDoubleEscapeEnd),
-                    _ => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped),
+                    '/' => {
+                        go!(self: clear_temp);
+                        go!(self: emit '/');
+                        go!(self: to ScriptDataDoubleEscapeEnd);
+                    }
+                    _ => {
+                        self.reconsume = true;
+                        go!(self: to RawData ScriptDataEscaped DoubleEscaped);
+                    }
                 }
             },
 
@@ -811,11 +928,20 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             // otherwise
             states::RawLessThanSign(kind) => loop {
                 match get_char!(self, input) {
-                    '/' => go!(self: clear_temp; to RawEndTagOpen kind),
+                    '/' => {
+                        go!(self: clear_temp);
+                        go!(self: to RawEndTagOpen kind);
+                    }
                     '!' if kind == ScriptData => {
-                        go!(self: emit '<'; emit '!'; to ScriptDataEscapeStart Escaped)
+                        go!(self: emit '<');
+                        go!(self: emit '!');
+                        go!(self: to ScriptDataEscapeStart Escaped);
+                    }
+                    _ => {
+                        go!(self: emit '<');
+                        self.reconsume = true;
+                        go!(self: to RawData kind);
                     }
-                    _ => go!(self: emit '<'; reconsume RawData kind),
                 }
             },
 
@@ -823,8 +949,17 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             states::RawEndTagOpen(kind) => loop {
                 let c = get_char!(self, input);
                 match lower_ascii_letter(c) {
-                    Some(cl) => go!(self: create_tag EndTag cl; push_temp c; to RawEndTagName kind),
-                    None => go!(self: emit '<'; emit '/'; reconsume RawData kind),
+                    Some(cl) => {
+                        go!(self: create_tag EndTag cl);
+                        go!(self: push_temp c);
+                        go!(self: to RawEndTagName kind);
+                    }
+                    None => {
+                        go!(self: emit '<');
+                        go!(self: emit '/');
+                        self.reconsume = true;
+                        go!(self: to RawData kind);
+                    }
                 }
             },
 
@@ -833,17 +968,31 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                 let c = get_char!(self, input);
                 if self.have_appropriate_end_tag() {
                     match c {
-                        '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName),
-                        '/' => go!(self: to SelfClosingStartTag),
-                        '>' => go!(self: emit_tag Data),
+                        '\t' | '\n' | '\x0C' | ' ' => {
+                            go!(self: to BeforeAttributeName);
+                        }
+                        '/' => {
+                            go!(self: to SelfClosingStartTag);
+                        }
+                        '>' => {
+                            go!(self: emit_tag Data);
+                        }
                         _ => (),
                     }
                 }
 
                 match lower_ascii_letter(c) {
-                    Some(cl) => go!(self: push_tag cl; push_temp c),
+                    Some(cl) => {
+                        go!(self: push_tag cl);
+                        go!(self: push_temp c);
+                    }
                     None => {
-                        go!(self: discard_tag; emit '<'; emit '/'; emit_temp; reconsume RawData kind)
+                        go!(self: discard_tag);
+                        go!(self: emit '<');
+                        go!(self: emit '/');
+                        go!(self: emit_temp);
+                        self.reconsume = true;
+                        go!(self: to RawData kind);
                     }
                 }
             },
@@ -858,11 +1007,20 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                         } else {
                             Escaped
                         };
-                        go!(self: emit c; to RawData ScriptDataEscaped esc);
+                        {
+                            go!(self: emit c);
+                            go!(self: to RawData ScriptDataEscaped esc);
+                        };
                     }
                     _ => match lower_ascii_letter(c) {
-                        Some(cl) => go!(self: push_temp cl; emit c),
-                        None => go!(self: reconsume RawData ScriptDataEscaped Escaped),
+                        Some(cl) => {
+                            go!(self: push_temp cl);
+                            go!(self: emit c);
+                        }
+                        None => {
+                            self.reconsume = true;
+                            go!(self: to RawData ScriptDataEscaped Escaped);
+                        }
                     },
                 }
             },
@@ -870,47 +1028,89 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ script-data-escape-start-state
             states::ScriptDataEscapeStart(Escaped) => loop {
                 match get_char!(self, input) {
-                    '-' => go!(self: emit '-'; to ScriptDataEscapeStartDash),
-                    _ => go!(self: reconsume RawData ScriptData),
+                    '-' => {
+                        go!(self: emit '-');
+                        go!(self: to ScriptDataEscapeStartDash);
+                    }
+                    _ => {
+                        self.reconsume = true;
+                        go!(self: to RawData ScriptData);
+                    }
                 }
             },
 
             //§ script-data-escape-start-dash-state
             states::ScriptDataEscapeStartDash => loop {
                 match get_char!(self, input) {
-                    '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash Escaped),
-                    _ => go!(self: reconsume RawData ScriptData),
+                    '-' => {
+                        go!(self: emit '-');
+                        go!(self: to ScriptDataEscapedDashDash Escaped);
+                    }
+                    _ => {
+                        self.reconsume = true;
+                        go!(self: to RawData ScriptData);
+                    }
                 }
             },
 
             //§ script-data-escaped-dash-state script-data-double-escaped-dash-state
             states::ScriptDataEscapedDash(kind) => loop {
                 match get_char!(self, input) {
-                    '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash kind),
+                    '-' => {
+                        go!(self: emit '-');
+                        go!(self: to ScriptDataEscapedDashDash kind);
+                    }
                     '<' => {
                         if kind == DoubleEscaped {
-                            go!(self: emit '<');
+                            {
+                                go!(self: emit '<');
+                            };
                         }
-                        go!(self: to RawLessThanSign ScriptDataEscaped kind);
+                        {
+                            go!(self: to RawLessThanSign ScriptDataEscaped kind);
+                        };
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: emit '\u{fffd}');
+                        go!(self: to RawData ScriptDataEscaped kind);
+                    }
+                    c => {
+                        go!(self: emit c);
+                        go!(self: to RawData ScriptDataEscaped kind);
                     }
-                    '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
-                    c => go!(self: emit c; to RawData ScriptDataEscaped kind),
                 }
             },
 
             //§ script-data-escaped-dash-dash-state script-data-double-escaped-dash-dash-state
             states::ScriptDataEscapedDashDash(kind) => loop {
                 match get_char!(self, input) {
-                    '-' => go!(self: emit '-'),
+                    '-' => {
+                        go!(self: emit '-');
+                    }
                     '<' => {
                         if kind == DoubleEscaped {
-                            go!(self: emit '<');
+                            {
+                                go!(self: emit '<');
+                            };
                         }
-                        go!(self: to RawLessThanSign ScriptDataEscaped kind);
+                        {
+                            go!(self: to RawLessThanSign ScriptDataEscaped kind);
+                        };
+                    }
+                    '>' => {
+                        go!(self: emit '>');
+                        go!(self: to RawData ScriptData);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: emit '\u{fffd}');
+                        go!(self: to RawData ScriptDataEscaped kind);
+                    }
+                    c => {
+                        go!(self: emit c);
+                        go!(self: to RawData ScriptDataEscaped kind);
                     }
-                    '>' => go!(self: emit '>'; to RawData ScriptData),
-                    '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
-                    c => go!(self: emit c; to RawData ScriptDataEscaped kind),
                 }
             },
 
@@ -924,11 +1124,20 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                         } else {
                             DoubleEscaped
                         };
-                        go!(self: emit c; to RawData ScriptDataEscaped esc);
+                        {
+                            go!(self: emit c);
+                            go!(self: to RawData ScriptDataEscaped esc);
+                        };
                     }
                     _ => match lower_ascii_letter(c) {
-                        Some(cl) => go!(self: push_temp cl; emit c),
-                        None => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped),
+                        Some(cl) => {
+                            go!(self: push_temp cl);
+                            go!(self: emit c);
+                        }
+                        None => {
+                            self.reconsume = true;
+                            go!(self: to RawData ScriptDataEscaped DoubleEscaped);
+                        }
                     },
                 }
             },
@@ -937,15 +1146,29 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             states::BeforeAttributeName => loop {
                 match get_char!(self, input) {
                     '\t' | '\n' | '\x0C' | ' ' => (),
-                    '/' => go!(self: to SelfClosingStartTag),
-                    '>' => go!(self: emit_tag Data),
-                    '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName),
+                    '/' => {
+                        go!(self: to SelfClosingStartTag);
+                    }
+                    '>' => {
+                        go!(self: emit_tag Data);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: create_attr '\u{fffd}');
+                        go!(self: to AttributeName);
+                    }
                     c => match lower_ascii_letter(c) {
-                        Some(cl) => go!(self: create_attr cl; to AttributeName),
+                        Some(cl) => {
+                            go!(self: create_attr cl);
+                            go!(self: to AttributeName);
+                        }
                         None => {
                             go_match!(self: c,
                             '"' , '\'' , '<' , '=' => error);
-                            go!(self: create_attr c; to AttributeName);
+                            {
+                                go!(self: create_attr c);
+                                go!(self: to AttributeName);
+                            };
                         }
                     },
                 }
@@ -954,17 +1177,32 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ attribute-name-state
             states::AttributeName => loop {
                 match get_char!(self, input) {
-                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to AfterAttributeName),
-                    '/' => go!(self: to SelfClosingStartTag),
-                    '=' => go!(self: to BeforeAttributeValue),
-                    '>' => go!(self: emit_tag Data),
-                    '\0' => go!(self: error; push_name '\u{fffd}'),
+                    '\t' | '\n' | '\x0C' | ' ' => {
+                        go!(self: to AfterAttributeName);
+                    }
+                    '/' => {
+                        go!(self: to SelfClosingStartTag);
+                    }
+                    '=' => {
+                        go!(self: to BeforeAttributeValue);
+                    }
+                    '>' => {
+                        go!(self: emit_tag Data);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: push_name '\u{fffd}');
+                    }
                     c => match lower_ascii_letter(c) {
-                        Some(cl) => go!(self: push_name cl),
+                        Some(cl) => {
+                            go!(self: push_name cl);
+                        }
                         None => {
                             go_match!(self: c,
                             '"' , '\'' , '<' => error);
-                            go!(self: push_name c);
+                            {
+                                go!(self: push_name c);
+                            };
                         }
                     },
                 }
@@ -974,16 +1212,32 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             states::AfterAttributeName => loop {
                 match get_char!(self, input) {
                     '\t' | '\n' | '\x0C' | ' ' => (),
-                    '/' => go!(self: to SelfClosingStartTag),
-                    '=' => go!(self: to BeforeAttributeValue),
-                    '>' => go!(self: emit_tag Data),
-                    '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName),
+                    '/' => {
+                        go!(self: to SelfClosingStartTag);
+                    }
+                    '=' => {
+                        go!(self: to BeforeAttributeValue);
+                    }
+                    '>' => {
+                        go!(self: emit_tag Data);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: create_attr '\u{fffd}');
+                        go!(self: to AttributeName);
+                    }
                     c => match lower_ascii_letter(c) {
-                        Some(cl) => go!(self: create_attr cl; to AttributeName),
+                        Some(cl) => {
+                            go!(self: create_attr cl);
+                            go!(self: to AttributeName);
+                        }
                         None => {
                             go_match!(self: c,
                             '"' , '\'' , '<' => error);
-                            go!(self: create_attr c; to AttributeName);
+                            {
+                                go!(self: create_attr c);
+                                go!(self: to AttributeName);
+                            };
                         }
                     },
                 }
@@ -994,36 +1248,75 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             // hopefully in the same zero-copy buffer.
             states::BeforeAttributeValue => loop {
                 match peek!(self, input) {
-                    '\t' | '\n' | '\r' | '\x0C' | ' ' => go!(self: discard_char input),
-                    '"' => go!(self: discard_char input; to AttributeValue DoubleQuoted),
-                    '\'' => go!(self: discard_char input; to AttributeValue SingleQuoted),
+                    '\t' | '\n' | '\r' | '\x0C' | ' ' => {
+                        go!(self: discard_char input);
+                    }
+                    '"' => {
+                        go!(self: discard_char input);
+                        go!(self: to AttributeValue DoubleQuoted);
+                    }
+                    '\'' => {
+                        go!(self: discard_char input);
+                        go!(self: to AttributeValue SingleQuoted);
+                    }
                     '\0' => {
-                        go!(self: discard_char input; error; push_value '\u{fffd}'; to AttributeValue Unquoted)
+                        go!(self: discard_char input);
+                        go!(self: error);
+                        go!(self: push_value '\u{fffd}');
+                        go!(self: to AttributeValue Unquoted);
+                    }
+                    '>' => {
+                        go!(self: discard_char input);
+                        go!(self: error);
+                        go!(self: emit_tag Data);
+                    }
+                    _ => {
+                        go!(self: to AttributeValue Unquoted);
                     }
-                    '>' => go!(self: discard_char input; error; emit_tag Data),
-                    _ => go!(self: to AttributeValue Unquoted),
                 }
             },
 
             //§ attribute-value-(double-quoted)-state
             states::AttributeValue(DoubleQuoted) => loop {
                 match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n')) {
-                    FromSet('"') => go!(self: to AfterAttributeValueQuoted),
-                    FromSet('&') => go!(self: consume_char_ref '"'),
-                    FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
-                    FromSet(c) => go!(self: push_value c),
-                    NotFromSet(ref b) => go!(self: append_value b),
+                    FromSet('"') => {
+                        go!(self: to AfterAttributeValueQuoted);
+                    }
+                    FromSet('&') => {
+                        go!(self: consume_char_ref '"');
+                    }
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: push_value '\u{fffd}');
+                    }
+                    FromSet(c) => {
+                        go!(self: push_value c);
+                    }
+                    NotFromSet(ref b) => {
+                        go!(self: append_value b);
+                    }
                 }
             },
 
             //§ attribute-value-(single-quoted)-state
             states::AttributeValue(SingleQuoted) => loop {
                 match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n')) {
-                    FromSet('\'') => go!(self: to AfterAttributeValueQuoted),
-                    FromSet('&') => go!(self: consume_char_ref '\''),
-                    FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
-                    FromSet(c) => go!(self: push_value c),
-                    NotFromSet(ref b) => go!(self: append_value b),
+                    FromSet('\'') => {
+                        go!(self: to AfterAttributeValueQuoted);
+                    }
+                    FromSet('&') => {
+                        go!(self: consume_char_ref '\'');
+                    }
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: push_value '\u{fffd}');
+                    }
+                    FromSet(c) => {
+                        go!(self: push_value c);
+                    }
+                    NotFromSet(ref b) => {
+                        go!(self: append_value b);
+                    }
                 }
             },
 
@@ -1035,27 +1328,48 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0')
                 ) {
                     FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => {
-                        go!(self: to BeforeAttributeName)
+                        go!(self: to BeforeAttributeName);
+                    }
+                    FromSet('&') => {
+                        go!(self: consume_char_ref '>');
+                    }
+                    FromSet('>') => {
+                        go!(self: emit_tag Data);
+                    }
+                    FromSet('\0') => {
+                        go!(self: error);
+                        go!(self: push_value '\u{fffd}');
                     }
-                    FromSet('&') => go!(self: consume_char_ref '>'),
-                    FromSet('>') => go!(self: emit_tag Data),
-                    FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
                     FromSet(c) => {
                         go_match!(self: c,
                             '"' , '\'' , '<' , '=' , '`' => error);
-                        go!(self: push_value c);
+                        {
+                            go!(self: push_value c);
+                        };
+                    }
+                    NotFromSet(ref b) => {
+                        go!(self: append_value b);
                     }
-                    NotFromSet(ref b) => go!(self: append_value b),
                 }
             },
 
             //§ after-attribute-value-(quoted)-state
             states::AfterAttributeValueQuoted => loop {
                 match get_char!(self, input) {
-                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName),
-                    '/' => go!(self: to SelfClosingStartTag),
-                    '>' => go!(self: emit_tag Data),
-                    _ => go!(self: error; reconsume BeforeAttributeName),
+                    '\t' | '\n' | '\x0C' | ' ' => {
+                        go!(self: to BeforeAttributeName);
+                    }
+                    '/' => {
+                        go!(self: to SelfClosingStartTag);
+                    }
+                    '>' => {
+                        go!(self: emit_tag Data);
+                    }
+                    _ => {
+                        go!(self: error);
+                        self.reconsume = true;
+                        go!(self: to BeforeAttributeName);
+                    }
                 }
             },
 
@@ -1064,76 +1378,164 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                 match get_char!(self, input) {
                     '>' => {
                         self.current_tag_self_closing = true;
-                        go!(self: emit_tag Data);
+                        {
+                            go!(self: emit_tag Data);
+                        };
+                    }
+                    _ => {
+                        go!(self: error);
+                        self.reconsume = true;
+                        go!(self: to BeforeAttributeName);
                     }
-                    _ => go!(self: error; reconsume BeforeAttributeName),
                 }
             },
 
             //§ comment-start-state
             states::CommentStart => loop {
                 match get_char!(self, input) {
-                    '-' => go!(self: to CommentStartDash),
-                    '\0' => go!(self: error; push_comment '\u{fffd}'; to Comment),
-                    '>' => go!(self: error; emit_comment; to Data),
-                    c => go!(self: push_comment c; to Comment),
+                    '-' => {
+                        go!(self: to CommentStartDash);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: push_comment '\u{fffd}');
+                        go!(self: to Comment);
+                    }
+                    '>' => {
+                        go!(self: error);
+                        go!(self: emit_comment);
+                        go!(self: to Data);
+                    }
+                    c => {
+                        go!(self: push_comment c);
+                        go!(self: to Comment);
+                    }
                 }
             },
 
             //§ comment-start-dash-state
             states::CommentStartDash => loop {
                 match get_char!(self, input) {
-                    '-' => go!(self: to CommentEnd),
-                    '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment),
-                    '>' => go!(self: error; emit_comment; to Data),
-                    c => go!(self: push_comment '-'; push_comment c; to Comment),
+                    '-' => {
+                        go!(self: to CommentEnd);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: append_comment "-\u{fffd}");
+                        go!(self: to Comment);
+                    }
+                    '>' => {
+                        go!(self: error);
+                        go!(self: emit_comment);
+                        go!(self: to Data);
+                    }
+                    c => {
+                        go!(self: push_comment '-');
+                        go!(self: push_comment c);
+                        go!(self: to Comment);
+                    }
                 }
             },
 
             //§ comment-state
             states::Comment => loop {
                 match get_char!(self, input) {
-                    '-' => go!(self: to CommentEndDash),
-                    '\0' => go!(self: error; push_comment '\u{fffd}'),
-                    c => go!(self: push_comment c),
+                    '-' => {
+                        go!(self: to CommentEndDash);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: push_comment '\u{fffd}');
+                    }
+                    c => {
+                        go!(self: push_comment c);
+                    }
                 }
             },
 
             //§ comment-end-dash-state
             states::CommentEndDash => loop {
                 match get_char!(self, input) {
-                    '-' => go!(self: to CommentEnd),
-                    '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment),
-                    c => go!(self: push_comment '-'; push_comment c; to Comment),
+                    '-' => {
+                        go!(self: to CommentEnd);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: append_comment "-\u{fffd}");
+                        go!(self: to Comment);
+                    }
+                    c => {
+                        go!(self: push_comment '-');
+                        go!(self: push_comment c);
+                        go!(self: to Comment);
+                    }
                 }
             },
 
             //§ comment-end-state
             states::CommentEnd => loop {
                 match get_char!(self, input) {
-                    '>' => go!(self: emit_comment; to Data),
-                    '\0' => go!(self: error; append_comment "--\u{fffd}"; to Comment),
-                    '!' => go!(self: error; to CommentEndBang),
-                    '-' => go!(self: error; push_comment '-'),
-                    c => go!(self: error; append_comment "--"; push_comment c; to Comment),
+                    '>' => {
+                        go!(self: emit_comment);
+                        go!(self: to Data);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: append_comment "--\u{fffd}");
+                        go!(self: to Comment);
+                    }
+                    '!' => {
+                        go!(self: error);
+                        go!(self: to CommentEndBang);
+                    }
+                    '-' => {
+                        go!(self: error);
+                        go!(self: push_comment '-');
+                    }
+                    c => {
+                        go!(self: error);
+                        go!(self: append_comment "--");
+                        go!(self: push_comment c);
+                        go!(self: to Comment);
+                    }
                 }
             },
 
             //§ comment-end-bang-state
             states::CommentEndBang => loop {
                 match get_char!(self, input) {
-                    '-' => go!(self: append_comment "--!"; to CommentEndDash),
-                    '>' => go!(self: emit_comment; to Data),
-                    '\0' => go!(self: error; append_comment "--!\u{fffd}"; to Comment),
-                    c => go!(self: append_comment "--!"; push_comment c; to Comment),
+                    '-' => {
+                        go!(self: append_comment "--!");
+                        go!(self: to CommentEndDash);
+                    }
+                    '>' => {
+                        go!(self: emit_comment);
+                        go!(self: to Data);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: append_comment "--!\u{fffd}");
+                        go!(self: to Comment);
+                    }
+                    c => {
+                        go!(self: append_comment "--!");
+                        go!(self: push_comment c);
+                        go!(self: to Comment);
+                    }
                 }
             },
 
             //§ doctype-state
             states::Doctype => loop {
                 match get_char!(self, input) {
-                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeName),
-                    _ => go!(self: error; reconsume BeforeDoctypeName),
+                    '\t' | '\n' | '\x0C' | ' ' => {
+                        go!(self: to BeforeDoctypeName);
+                    }
+                    _ => {
+                        go!(self: error);
+                        self.reconsume = true;
+                        go!(self: to BeforeDoctypeName);
+                    }
                 }
             },
 
@@ -1142,11 +1544,22 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                 match get_char!(self, input) {
                     '\t' | '\n' | '\x0C' | ' ' => (),
                     '\0' => {
-                        go!(self: error; create_doctype; push_doctype_name '\u{fffd}'; to DoctypeName)
+                        go!(self: error);
+                        go!(self: create_doctype);
+                        go!(self: push_doctype_name '\u{fffd}');
+                        go!(self: to DoctypeName);
+                    }
+                    '>' => {
+                        go!(self: error);
+                        go!(self: create_doctype);
+                        go!(self: force_quirks);
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
                     }
-                    '>' => go!(self: error; create_doctype; force_quirks; emit_doctype; to Data),
                     c => {
-                        go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase()); to DoctypeName)
+                        go!(self: create_doctype);
+                        go!(self: push_doctype_name (c.to_ascii_lowercase()));
+                        go!(self: to DoctypeName);
                     }
                 }
             },
@@ -1154,24 +1567,46 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ doctype-name-state
             states::DoctypeName => loop {
                 match get_char!(self, input) {
-                    '\t' | '\n' | '\x0C' | ' ' => go!(self: clear_temp; to AfterDoctypeName),
-                    '>' => go!(self: emit_doctype; to Data),
-                    '\0' => go!(self: error; push_doctype_name '\u{fffd}'),
-                    c => go!(self: push_doctype_name (c.to_ascii_lowercase())),
+                    '\t' | '\n' | '\x0C' | ' ' => {
+                        go!(self: clear_temp);
+                        go!(self: to AfterDoctypeName);
+                    }
+                    '>' => {
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: push_doctype_name '\u{fffd}');
+                    }
+                    c => {
+                        go!(self: push_doctype_name (c.to_ascii_lowercase()));
+                    }
                 }
             },
 
             //§ after-doctype-name-state
             states::AfterDoctypeName => loop {
                 if eat!(self, input, "public") {
-                    go!(self: to AfterDoctypeKeyword Public);
+                    {
+                        go!(self: to AfterDoctypeKeyword Public);
+                    };
                 } else if eat!(self, input, "system") {
-                    go!(self: to AfterDoctypeKeyword System);
+                    {
+                        go!(self: to AfterDoctypeKeyword System);
+                    };
                 } else {
                     match get_char!(self, input) {
                         '\t' | '\n' | '\x0C' | ' ' => (),
-                        '>' => go!(self: emit_doctype; to Data),
-                        _ => go!(self: error; force_quirks; to BogusDoctype),
+                        '>' => {
+                            go!(self: emit_doctype);
+                            go!(self: to Data);
+                        }
+                        _ => {
+                            go!(self: error);
+                            go!(self: force_quirks);
+                            go!(self: to BogusDoctype);
+                        }
                     }
                 }
             },
@@ -1179,15 +1614,30 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ after-doctype-public-keyword-state after-doctype-system-keyword-state
             states::AfterDoctypeKeyword(kind) => loop {
                 match get_char!(self, input) {
-                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier kind),
+                    '\t' | '\n' | '\x0C' | ' ' => {
+                        go!(self: to BeforeDoctypeIdentifier kind);
+                    }
                     '"' => {
-                        go!(self: error; clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind)
+                        go!(self: error);
+                        go!(self: clear_doctype_id kind);
+                        go!(self: to DoctypeIdentifierDoubleQuoted kind);
                     }
                     '\'' => {
-                        go!(self: error; clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind)
+                        go!(self: error);
+                        go!(self: clear_doctype_id kind);
+                        go!(self: to DoctypeIdentifierSingleQuoted kind);
+                    }
+                    '>' => {
+                        go!(self: error);
+                        go!(self: force_quirks);
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
+                    }
+                    _ => {
+                        go!(self: error);
+                        go!(self: force_quirks);
+                        go!(self: to BogusDoctype);
                     }
-                    '>' => go!(self: error; force_quirks; emit_doctype; to Data),
-                    _ => go!(self: error; force_quirks; to BogusDoctype),
                 }
             },
 
@@ -1195,30 +1645,69 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             states::BeforeDoctypeIdentifier(kind) => loop {
                 match get_char!(self, input) {
                     '\t' | '\n' | '\x0C' | ' ' => (),
-                    '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind),
-                    '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind),
-                    '>' => go!(self: error; force_quirks; emit_doctype; to Data),
-                    _ => go!(self: error; force_quirks; to BogusDoctype),
+                    '"' => {
+                        go!(self: clear_doctype_id kind);
+                        go!(self: to DoctypeIdentifierDoubleQuoted kind);
+                    }
+                    '\'' => {
+                        go!(self: clear_doctype_id kind);
+                        go!(self: to DoctypeIdentifierSingleQuoted kind);
+                    }
+                    '>' => {
+                        go!(self: error);
+                        go!(self: force_quirks);
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
+                    }
+                    _ => {
+                        go!(self: error);
+                        go!(self: force_quirks);
+                        go!(self: to BogusDoctype);
+                    }
                 }
             },
 
             //§ doctype-public-identifier-(double-quoted)-state doctype-system-identifier-(double-quoted)-state
             states::DoctypeIdentifierDoubleQuoted(kind) => loop {
                 match get_char!(self, input) {
-                    '"' => go!(self: to AfterDoctypeIdentifier kind),
-                    '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'),
-                    '>' => go!(self: error; force_quirks; emit_doctype; to Data),
-                    c => go!(self: push_doctype_id kind c),
+                    '"' => {
+                        go!(self: to AfterDoctypeIdentifier kind);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: push_doctype_id kind '\u{fffd}');
+                    }
+                    '>' => {
+                        go!(self: error);
+                        go!(self: force_quirks);
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
+                    }
+                    c => {
+                        go!(self: push_doctype_id kind c);
+                    }
                 }
             },
 
             //§ doctype-public-identifier-(single-quoted)-state doctype-system-identifier-(single-quoted)-state
             states::DoctypeIdentifierSingleQuoted(kind) => loop {
                 match get_char!(self, input) {
-                    '\'' => go!(self: to AfterDoctypeIdentifier kind),
-                    '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'),
-                    '>' => go!(self: error; force_quirks; emit_doctype; to Data),
-                    c => go!(self: push_doctype_id kind c),
+                    '\'' => {
+                        go!(self: to AfterDoctypeIdentifier kind);
+                    }
+                    '\0' => {
+                        go!(self: error);
+                        go!(self: push_doctype_id kind '\u{fffd}');
+                    }
+                    '>' => {
+                        go!(self: error);
+                        go!(self: force_quirks);
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
+                    }
+                    c => {
+                        go!(self: push_doctype_id kind c);
+                    }
                 }
             },
 
@@ -1226,16 +1715,27 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             states::AfterDoctypeIdentifier(Public) => loop {
                 match get_char!(self, input) {
                     '\t' | '\n' | '\x0C' | ' ' => {
-                        go!(self: to BetweenDoctypePublicAndSystemIdentifiers)
+                        go!(self: to BetweenDoctypePublicAndSystemIdentifiers);
+                    }
+                    '>' => {
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
                     }
-                    '>' => go!(self: emit_doctype; to Data),
                     '"' => {
-                        go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System)
+                        go!(self: error);
+                        go!(self: clear_doctype_id System);
+                        go!(self: to DoctypeIdentifierDoubleQuoted System);
                     }
                     '\'' => {
-                        go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System)
+                        go!(self: error);
+                        go!(self: clear_doctype_id System);
+                        go!(self: to DoctypeIdentifierSingleQuoted System);
+                    }
+                    _ => {
+                        go!(self: error);
+                        go!(self: force_quirks);
+                        go!(self: to BogusDoctype);
                     }
-                    _ => go!(self: error; force_quirks; to BogusDoctype),
                 }
             },
 
@@ -1243,8 +1743,14 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             states::AfterDoctypeIdentifier(System) => loop {
                 match get_char!(self, input) {
                     '\t' | '\n' | '\x0C' | ' ' => (),
-                    '>' => go!(self: emit_doctype; to Data),
-                    _ => go!(self: error; to BogusDoctype),
+                    '>' => {
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
+                    }
+                    _ => {
+                        go!(self: error);
+                        go!(self: to BogusDoctype);
+                    }
                 }
             },
 
@@ -1252,21 +1758,33 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             states::BetweenDoctypePublicAndSystemIdentifiers => loop {
                 match get_char!(self, input) {
                     '\t' | '\n' | '\x0C' | ' ' => (),
-                    '>' => go!(self: emit_doctype; to Data),
+                    '>' => {
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
+                    }
                     '"' => {
-                        go!(self: clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System)
+                        go!(self: clear_doctype_id System);
+                        go!(self: to DoctypeIdentifierDoubleQuoted System);
                     }
                     '\'' => {
-                        go!(self: clear_doctype_id System; to DoctypeIdentifierSingleQuoted System)
+                        go!(self: clear_doctype_id System);
+                        go!(self: to DoctypeIdentifierSingleQuoted System);
+                    }
+                    _ => {
+                        go!(self: error);
+                        go!(self: force_quirks);
+                        go!(self: to BogusDoctype);
                     }
-                    _ => go!(self: error; force_quirks; to BogusDoctype),
                 }
             },
 
             //§ bogus-doctype-state
             states::BogusDoctype => loop {
                 match get_char!(self, input) {
-                    '>' => go!(self: emit_doctype; to Data),
+                    '>' => {
+                        go!(self: emit_doctype);
+                        go!(self: to Data);
+                    }
                     _ => (),
                 }
             },
@@ -1274,52 +1792,93 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             //§ bogus-comment-state
             states::BogusComment => loop {
                 match get_char!(self, input) {
-                    '>' => go!(self: emit_comment; to Data),
-                    '\0' => go!(self: push_comment '\u{fffd}'),
-                    c => go!(self: push_comment c),
+                    '>' => {
+                        go!(self: emit_comment);
+                        go!(self: to Data);
+                    }
+                    '\0' => {
+                        go!(self: push_comment '\u{fffd}');
+                    }
+                    c => {
+                        go!(self: push_comment c);
+                    }
                 }
             },
 
             //§ markup-declaration-open-state
             states::MarkupDeclarationOpen => loop {
                 if eat_exact!(self, input, "--") {
-                    go!(self: clear_comment; to CommentStart);
+                    {
+                        go!(self: clear_comment);
+                        go!(self: to CommentStart);
+                    };
                 } else if eat!(self, input, "doctype") {
-                    go!(self: to Doctype);
+                    {
+                        go!(self: to Doctype);
+                    };
                 } else {
                     if self
                         .sink
                         .adjusted_current_node_present_but_not_in_html_namespace()
                     {
                         if eat_exact!(self, input, "[CDATA[") {
-                            go!(self: clear_temp; to CdataSection);
+                            {
+                                go!(self: clear_temp);
+                                go!(self: to CdataSection);
+                            };
                         }
                     }
-                    go!(self: error; to BogusComment);
+                    {
+                        go!(self: error);
+                        go!(self: to BogusComment);
+                    };
                 }
             },
 
             //§ cdata-section-state
             states::CdataSection => loop {
                 match get_char!(self, input) {
-                    ']' => go!(self: to CdataSectionBracket),
-                    '\0' => go!(self: emit_temp; emit '\0'),
-                    c => go!(self: push_temp c),
+                    ']' => {
+                        go!(self: to CdataSectionBracket);
+                    }
+                    '\0' => {
+                        go!(self: emit_temp);
+                        go!(self: emit '\0');
+                    }
+                    c => {
+                        go!(self: push_temp c);
+                    }
                 }
             },
 
             //§ cdata-section-bracket
             states::CdataSectionBracket => match get_char!(self, input) {
-                ']' => go!(self: to CdataSectionEnd),
-                _ => go!(self: push_temp ']'; reconsume CdataSection),
+                ']' => {
+                    go!(self: to CdataSectionEnd);
+                }
+                _ => {
+                    go!(self: push_temp ']');
+                    self.reconsume = true;
+                    go!(self: to CdataSection);
+                }
             },
 
             //§ cdata-section-end
             states::CdataSectionEnd => loop {
                 match get_char!(self, input) {
-                    ']' => go!(self: push_temp ']'),
-                    '>' => go!(self: emit_temp; to Data),
-                    _ => go!(self: push_temp ']'; push_temp ']'; reconsume CdataSection),
+                    ']' => {
+                        go!(self: push_temp ']');
+                    }
+                    '>' => {
+                        go!(self: emit_temp);
+                        go!(self: to Data);
+                    }
+                    _ => {
+                        go!(self: push_temp ']');
+                        go!(self: push_temp ']');
+                        self.reconsume = true;
+                        go!(self: to CdataSection);
+                    }
                 }
             },
             //§ END
@@ -1360,9 +1919,13 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
         for i in 0..num_chars {
             let c = chars[i as usize];
             match self.state {
-                states::Data | states::RawData(states::Rcdata) => go!(self: emit c),
+                states::Data | states::RawData(states::Rcdata) => {
+                    go!(self: emit c);
+                }
 
-                states::AttributeValue(_) => go!(self: push_value c),
+                states::AttributeValue(_) => {
+                    go!(self: push_value c);
+                }
 
                 _ => panic!(
                     "state {:?} should not be reachable in process_char_ref",
@@ -1446,30 +2009,56 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             | states::AfterAttributeValueQuoted
             | states::SelfClosingStartTag
             | states::ScriptDataEscapedDash(_)
-            | states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data),
+            | states::ScriptDataEscapedDashDash(_) => {
+                go!(self: error_eof);
+                go!(self: to Data);
+            }
 
-            states::TagOpen => go!(self: error_eof; emit '<'; to Data),
+            states::TagOpen => {
+                go!(self: error_eof);
+                go!(self: emit '<');
+                go!(self: to Data);
+            }
 
-            states::EndTagOpen => go!(self: error_eof; emit '<'; emit '/'; to Data),
+            states::EndTagOpen => {
+                go!(self: error_eof);
+                go!(self: emit '<');
+                go!(self: emit '/');
+                go!(self: to Data);
+            }
 
             states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => {
-                go!(self: to RawData ScriptDataEscaped DoubleEscaped)
+                go!(self: to RawData ScriptDataEscaped DoubleEscaped);
             }
 
-            states::RawLessThanSign(kind) => go!(self: emit '<'; to RawData kind),
+            states::RawLessThanSign(kind) => {
+                go!(self: emit '<');
+                go!(self: to RawData kind);
+            }
 
-            states::RawEndTagOpen(kind) => go!(self: emit '<'; emit '/'; to RawData kind),
+            states::RawEndTagOpen(kind) => {
+                go!(self: emit '<');
+                go!(self: emit '/');
+                go!(self: to RawData kind);
+            }
 
             states::RawEndTagName(kind) => {
-                go!(self: emit '<'; emit '/'; emit_temp; to RawData kind)
+                go!(self: emit '<');
+                go!(self: emit '/');
+                go!(self: emit_temp);
+                go!(self: to RawData kind);
             }
 
-            states::ScriptDataEscapeStart(kind) => go!(self: to RawData ScriptDataEscaped kind),
+            states::ScriptDataEscapeStart(kind) => {
+                go!(self: to RawData ScriptDataEscaped kind);
+            }
 
-            states::ScriptDataEscapeStartDash => go!(self: to RawData ScriptData),
+            states::ScriptDataEscapeStartDash => {
+                go!(self: to RawData ScriptData);
+            }
 
             states::ScriptDataDoubleEscapeEnd => {
-                go!(self: to RawData ScriptDataEscaped DoubleEscaped)
+                go!(self: to RawData ScriptDataEscaped DoubleEscaped);
             }
 
             states::CommentStart
@@ -1477,10 +2066,18 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             | states::Comment
             | states::CommentEndDash
             | states::CommentEnd
-            | states::CommentEndBang => go!(self: error_eof; emit_comment; to Data),
+            | states::CommentEndBang => {
+                go!(self: error_eof);
+                go!(self: emit_comment);
+                go!(self: to Data);
+            }
 
             states::Doctype | states::BeforeDoctypeName => {
-                go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data)
+                go!(self: error_eof);
+                go!(self: create_doctype);
+                go!(self: force_quirks);
+                go!(self: emit_doctype);
+                go!(self: to Data);
             }
 
             states::DoctypeName
@@ -1491,20 +2088,43 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             | states::DoctypeIdentifierSingleQuoted(_)
             | states::AfterDoctypeIdentifier(_)
             | states::BetweenDoctypePublicAndSystemIdentifiers => {
-                go!(self: error_eof; force_quirks; emit_doctype; to Data)
+                go!(self: error_eof);
+                go!(self: force_quirks);
+                go!(self: emit_doctype);
+                go!(self: to Data);
             }
 
-            states::BogusDoctype => go!(self: emit_doctype; to Data),
+            states::BogusDoctype => {
+                go!(self: emit_doctype);
+                go!(self: to Data);
+            }
 
-            states::BogusComment => go!(self: emit_comment; to Data),
+            states::BogusComment => {
+                go!(self: emit_comment);
+                go!(self: to Data);
+            }
 
-            states::MarkupDeclarationOpen => go!(self: error; to BogusComment),
+            states::MarkupDeclarationOpen => {
+                go!(self: error);
+                go!(self: to BogusComment);
+            }
 
-            states::CdataSection => go!(self: emit_temp; error_eof; to Data),
+            states::CdataSection => {
+                go!(self: emit_temp);
+                go!(self: error_eof);
+                go!(self: to Data);
+            }
 
-            states::CdataSectionBracket => go!(self: push_temp ']'; to CdataSection),
+            states::CdataSectionBracket => {
+                go!(self: push_temp ']');
+                go!(self: to CdataSection);
+            }
 
-            states::CdataSectionEnd => go!(self: push_temp ']'; push_temp ']'; to CdataSection),
+            states::CdataSectionEnd => {
+                go!(self: push_temp ']');
+                go!(self: push_temp ']');
+                go!(self: to CdataSection);
+            }
         }
     }
 }
author	Martin Fischer <martin@push-f.com>	2021-11-28 10:07:00 +0100
committer	Martin Fischer <martin@push-f.com>	2021-11-29 15:11:01 +0100
commit	2b84e92f4f0981b099f30759330d00a73c90dee8 (patch)
tree	31d39077aa50944ff9bbdd784f316b5811d9c31c
parent	8c3be2ac471edc6f24059d552e82fb5acc3d7cc2 (diff)