diff options
| author | Martin Fischer <martin@push-f.com> | 2021-11-28 10:07:00 +0100 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2021-11-29 15:11:01 +0100 | 
| commit | 2b84e92f4f0981b099f30759330d00a73c90dee8 (patch) | |
| tree | 31d39077aa50944ff9bbdd784f316b5811d9c31c /src/tokenizer/mod.rs | |
| parent | 8c3be2ac471edc6f24059d552e82fb5acc3d7cc2 (diff) | |
refactor: split up go! macro calls
#!/usr/bin/env python3
import re
def split_go(match):
    inner = match.group(1)
    stmts = inner.split(';')
    text = '{'
    for stmt in stmts:
        if stmt.startswith(' reconsume '):
            text += 'self.reconsume = true;'
            stmt = ' to' + stmt[len(' reconsume'):]
        text += 'go!(self:{});'.format(stmt)
    return text + '}'
text = ''
with open('src/tokenizer/mod.rs') as f:
    for line in f:
        if '$me:ident' in line:
            # skip macro rules
            text += line
        else:
            text += re.sub('go!\(self:(.*)\)', split_go, line)
with open('src/tokenizer/mod.rs', 'w') as f:
    f.write(text)
import subprocess
subprocess.call(['cargo', 'fmt'])
Diffstat (limited to 'src/tokenizer/mod.rs')
| -rw-r--r-- | src/tokenizer/mod.rs | 1062 | 
1 files changed, 841 insertions, 221 deletions
| diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index a8ec39f..fc8bd7f 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -676,10 +676,19 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ data-state              states::Data => loop {                  match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) { -                    FromSet('\0') => go!(self: error; emit '\0'), -                    FromSet('&') => go!(self: consume_char_ref), -                    FromSet('<') => go!(self: to TagOpen), -                    FromSet(c) => go!(self: emit c), +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: emit '\0'); +                    } +                    FromSet('&') => { +                        go!(self: consume_char_ref); +                    } +                    FromSet('<') => { +                        go!(self: to TagOpen); +                    } +                    FromSet(c) => { +                        go!(self: emit c); +                    }                      NotFromSet(b) => self.emit_chars(b),                  }              }, @@ -687,10 +696,19 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ rcdata-state              states::RawData(Rcdata) => loop {                  match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) { -                    FromSet('\0') => go!(self: error; emit '\u{fffd}'), -                    FromSet('&') => go!(self: consume_char_ref), -                    FromSet('<') => go!(self: to RawLessThanSign Rcdata), -                    FromSet(c) => go!(self: emit c), +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: emit '\u{fffd}'); +                    } +                    FromSet('&') => { +                        go!(self: consume_char_ref); +                    } +                    FromSet('<') => { +                        go!(self: to RawLessThanSign Rcdata); +                    } +                    FromSet(c) => { +                        go!(self: emit c); +                    }                      NotFromSet(b) => self.emit_chars(b),                  }              }, @@ -698,9 +716,16 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ rawtext-state              states::RawData(Rawtext) => loop {                  match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) { -                    FromSet('\0') => go!(self: error; emit '\u{fffd}'), -                    FromSet('<') => go!(self: to RawLessThanSign Rawtext), -                    FromSet(c) => go!(self: emit c), +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: emit '\u{fffd}'); +                    } +                    FromSet('<') => { +                        go!(self: to RawLessThanSign Rawtext); +                    } +                    FromSet(c) => { +                        go!(self: emit c); +                    }                      NotFromSet(b) => self.emit_chars(b),                  }              }, @@ -708,9 +733,16 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ script-data-state              states::RawData(ScriptData) => loop {                  match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) { -                    FromSet('\0') => go!(self: error; emit '\u{fffd}'), -                    FromSet('<') => go!(self: to RawLessThanSign ScriptData), -                    FromSet(c) => go!(self: emit c), +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: emit '\u{fffd}'); +                    } +                    FromSet('<') => { +                        go!(self: to RawLessThanSign ScriptData); +                    } +                    FromSet(c) => { +                        go!(self: emit c); +                    }                      NotFromSet(b) => self.emit_chars(b),                  }              }, @@ -718,10 +750,20 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ script-data-escaped-state              states::RawData(ScriptDataEscaped(Escaped)) => loop {                  match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) { -                    FromSet('\0') => go!(self: error; emit '\u{fffd}'), -                    FromSet('-') => go!(self: emit '-'; to ScriptDataEscapedDash Escaped), -                    FromSet('<') => go!(self: to RawLessThanSign ScriptDataEscaped Escaped), -                    FromSet(c) => go!(self: emit c), +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: emit '\u{fffd}'); +                    } +                    FromSet('-') => { +                        go!(self: emit '-'); +                        go!(self: to ScriptDataEscapedDash Escaped); +                    } +                    FromSet('<') => { +                        go!(self: to RawLessThanSign ScriptDataEscaped Escaped); +                    } +                    FromSet(c) => { +                        go!(self: emit c); +                    }                      NotFromSet(b) => self.emit_chars(b),                  }              }, @@ -729,12 +771,21 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ script-data-double-escaped-state              states::RawData(ScriptDataEscaped(DoubleEscaped)) => loop {                  match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) { -                    FromSet('\0') => go!(self: error; emit '\u{fffd}'), -                    FromSet('-') => go!(self: emit '-'; to ScriptDataEscapedDash DoubleEscaped), +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: emit '\u{fffd}'); +                    } +                    FromSet('-') => { +                        go!(self: emit '-'); +                        go!(self: to ScriptDataEscapedDash DoubleEscaped); +                    }                      FromSet('<') => { -                        go!(self: emit '<'; to RawLessThanSign ScriptDataEscaped DoubleEscaped) +                        go!(self: emit '<'); +                        go!(self: to RawLessThanSign ScriptDataEscaped DoubleEscaped); +                    } +                    FromSet(c) => { +                        go!(self: emit c);                      } -                    FromSet(c) => go!(self: emit c),                      NotFromSet(b) => self.emit_chars(b),                  }              }, @@ -742,8 +793,13 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ plaintext-state              states::Plaintext => loop {                  match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n')) { -                    FromSet('\0') => go!(self: error; emit '\u{fffd}'), -                    FromSet(c) => go!(self: emit c), +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: emit '\u{fffd}'); +                    } +                    FromSet(c) => { +                        go!(self: emit c); +                    }                      NotFromSet(b) => self.emit_chars(b),                  }              }, @@ -751,12 +807,30 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ tag-open-state              states::TagOpen => loop {                  match get_char!(self, input) { -                    '!' => go!(self: clear_temp; to MarkupDeclarationOpen), -                    '/' => go!(self: to EndTagOpen), -                    '?' => go!(self: error; clear_comment; push_comment '?'; to BogusComment), +                    '!' => { +                        go!(self: clear_temp); +                        go!(self: to MarkupDeclarationOpen); +                    } +                    '/' => { +                        go!(self: to EndTagOpen); +                    } +                    '?' => { +                        go!(self: error); +                        go!(self: clear_comment); +                        go!(self: push_comment '?'); +                        go!(self: to BogusComment); +                    }                      c => match lower_ascii_letter(c) { -                        Some(cl) => go!(self: create_tag StartTag cl; to TagName), -                        None => go!(self: error; emit '<'; reconsume Data), +                        Some(cl) => { +                            go!(self: create_tag StartTag cl); +                            go!(self: to TagName); +                        } +                        None => { +                            go!(self: error); +                            go!(self: emit '<'); +                            self.reconsume = true; +                            go!(self: to Data); +                        }                      },                  }              }, @@ -764,13 +838,27 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ end-tag-open-state              states::EndTagOpen => loop {                  match get_char!(self, input) { -                    '>' => go!(self: error; to Data), +                    '>' => { +                        go!(self: error); +                        go!(self: to Data); +                    }                      '\0' => { -                        go!(self: error; clear_comment; push_comment '\u{fffd}'; to BogusComment) +                        go!(self: error); +                        go!(self: clear_comment); +                        go!(self: push_comment '\u{fffd}'); +                        go!(self: to BogusComment);                      }                      c => match lower_ascii_letter(c) { -                        Some(cl) => go!(self: create_tag EndTag cl; to TagName), -                        None => go!(self: error; clear_comment; push_comment c; to BogusComment), +                        Some(cl) => { +                            go!(self: create_tag EndTag cl); +                            go!(self: to TagName); +                        } +                        None => { +                            go!(self: error); +                            go!(self: clear_comment); +                            go!(self: push_comment c); +                            go!(self: to BogusComment); +                        }                      },                  }              }, @@ -778,23 +866,45 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ tag-name-state              states::TagName => loop {                  match get_char!(self, input) { -                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName), -                    '/' => go!(self: to SelfClosingStartTag), -                    '>' => go!(self: emit_tag Data), -                    '\0' => go!(self: error; push_tag '\u{fffd}'), -                    c => go!(self: push_tag (c.to_ascii_lowercase())), +                    '\t' | '\n' | '\x0C' | ' ' => { +                        go!(self: to BeforeAttributeName); +                    } +                    '/' => { +                        go!(self: to SelfClosingStartTag); +                    } +                    '>' => { +                        go!(self: emit_tag Data); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: push_tag '\u{fffd}'); +                    } +                    c => { +                        go!(self: push_tag (c.to_ascii_lowercase())); +                    }                  }              },              //§ script-data-escaped-less-than-sign-state              states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop {                  match get_char!(self, input) { -                    '/' => go!(self: clear_temp; to RawEndTagOpen ScriptDataEscaped Escaped), +                    '/' => { +                        go!(self: clear_temp); +                        go!(self: to RawEndTagOpen ScriptDataEscaped Escaped); +                    }                      c => match lower_ascii_letter(c) {                          Some(cl) => { -                            go!(self: clear_temp; push_temp cl; emit '<'; emit c; to ScriptDataEscapeStart DoubleEscaped) +                            go!(self: clear_temp); +                            go!(self: push_temp cl); +                            go!(self: emit '<'); +                            go!(self: emit c); +                            go!(self: to ScriptDataEscapeStart DoubleEscaped); +                        } +                        None => { +                            go!(self: emit '<'); +                            self.reconsume = true; +                            go!(self: to RawData ScriptDataEscaped Escaped);                          } -                        None => go!(self: emit '<'; reconsume RawData ScriptDataEscaped Escaped),                      },                  }              }, @@ -802,8 +912,15 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ script-data-double-escaped-less-than-sign-state              states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop {                  match get_char!(self, input) { -                    '/' => go!(self: clear_temp; emit '/'; to ScriptDataDoubleEscapeEnd), -                    _ => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped), +                    '/' => { +                        go!(self: clear_temp); +                        go!(self: emit '/'); +                        go!(self: to ScriptDataDoubleEscapeEnd); +                    } +                    _ => { +                        self.reconsume = true; +                        go!(self: to RawData ScriptDataEscaped DoubleEscaped); +                    }                  }              }, @@ -811,11 +928,20 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              // otherwise              states::RawLessThanSign(kind) => loop {                  match get_char!(self, input) { -                    '/' => go!(self: clear_temp; to RawEndTagOpen kind), +                    '/' => { +                        go!(self: clear_temp); +                        go!(self: to RawEndTagOpen kind); +                    }                      '!' if kind == ScriptData => { -                        go!(self: emit '<'; emit '!'; to ScriptDataEscapeStart Escaped) +                        go!(self: emit '<'); +                        go!(self: emit '!'); +                        go!(self: to ScriptDataEscapeStart Escaped); +                    } +                    _ => { +                        go!(self: emit '<'); +                        self.reconsume = true; +                        go!(self: to RawData kind);                      } -                    _ => go!(self: emit '<'; reconsume RawData kind),                  }              }, @@ -823,8 +949,17 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              states::RawEndTagOpen(kind) => loop {                  let c = get_char!(self, input);                  match lower_ascii_letter(c) { -                    Some(cl) => go!(self: create_tag EndTag cl; push_temp c; to RawEndTagName kind), -                    None => go!(self: emit '<'; emit '/'; reconsume RawData kind), +                    Some(cl) => { +                        go!(self: create_tag EndTag cl); +                        go!(self: push_temp c); +                        go!(self: to RawEndTagName kind); +                    } +                    None => { +                        go!(self: emit '<'); +                        go!(self: emit '/'); +                        self.reconsume = true; +                        go!(self: to RawData kind); +                    }                  }              }, @@ -833,17 +968,31 @@ impl<Sink: TokenSink> Tokenizer<Sink> {                  let c = get_char!(self, input);                  if self.have_appropriate_end_tag() {                      match c { -                        '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName), -                        '/' => go!(self: to SelfClosingStartTag), -                        '>' => go!(self: emit_tag Data), +                        '\t' | '\n' | '\x0C' | ' ' => { +                            go!(self: to BeforeAttributeName); +                        } +                        '/' => { +                            go!(self: to SelfClosingStartTag); +                        } +                        '>' => { +                            go!(self: emit_tag Data); +                        }                          _ => (),                      }                  }                  match lower_ascii_letter(c) { -                    Some(cl) => go!(self: push_tag cl; push_temp c), +                    Some(cl) => { +                        go!(self: push_tag cl); +                        go!(self: push_temp c); +                    }                      None => { -                        go!(self: discard_tag; emit '<'; emit '/'; emit_temp; reconsume RawData kind) +                        go!(self: discard_tag); +                        go!(self: emit '<'); +                        go!(self: emit '/'); +                        go!(self: emit_temp); +                        self.reconsume = true; +                        go!(self: to RawData kind);                      }                  }              }, @@ -858,11 +1007,20 @@ impl<Sink: TokenSink> Tokenizer<Sink> {                          } else {                              Escaped                          }; -                        go!(self: emit c; to RawData ScriptDataEscaped esc); +                        { +                            go!(self: emit c); +                            go!(self: to RawData ScriptDataEscaped esc); +                        };                      }                      _ => match lower_ascii_letter(c) { -                        Some(cl) => go!(self: push_temp cl; emit c), -                        None => go!(self: reconsume RawData ScriptDataEscaped Escaped), +                        Some(cl) => { +                            go!(self: push_temp cl); +                            go!(self: emit c); +                        } +                        None => { +                            self.reconsume = true; +                            go!(self: to RawData ScriptDataEscaped Escaped); +                        }                      },                  }              }, @@ -870,47 +1028,89 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ script-data-escape-start-state              states::ScriptDataEscapeStart(Escaped) => loop {                  match get_char!(self, input) { -                    '-' => go!(self: emit '-'; to ScriptDataEscapeStartDash), -                    _ => go!(self: reconsume RawData ScriptData), +                    '-' => { +                        go!(self: emit '-'); +                        go!(self: to ScriptDataEscapeStartDash); +                    } +                    _ => { +                        self.reconsume = true; +                        go!(self: to RawData ScriptData); +                    }                  }              },              //§ script-data-escape-start-dash-state              states::ScriptDataEscapeStartDash => loop {                  match get_char!(self, input) { -                    '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash Escaped), -                    _ => go!(self: reconsume RawData ScriptData), +                    '-' => { +                        go!(self: emit '-'); +                        go!(self: to ScriptDataEscapedDashDash Escaped); +                    } +                    _ => { +                        self.reconsume = true; +                        go!(self: to RawData ScriptData); +                    }                  }              },              //§ script-data-escaped-dash-state script-data-double-escaped-dash-state              states::ScriptDataEscapedDash(kind) => loop {                  match get_char!(self, input) { -                    '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash kind), +                    '-' => { +                        go!(self: emit '-'); +                        go!(self: to ScriptDataEscapedDashDash kind); +                    }                      '<' => {                          if kind == DoubleEscaped { -                            go!(self: emit '<'); +                            { +                                go!(self: emit '<'); +                            };                          } -                        go!(self: to RawLessThanSign ScriptDataEscaped kind); +                        { +                            go!(self: to RawLessThanSign ScriptDataEscaped kind); +                        }; +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: emit '\u{fffd}'); +                        go!(self: to RawData ScriptDataEscaped kind); +                    } +                    c => { +                        go!(self: emit c); +                        go!(self: to RawData ScriptDataEscaped kind);                      } -                    '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind), -                    c => go!(self: emit c; to RawData ScriptDataEscaped kind),                  }              },              //§ script-data-escaped-dash-dash-state script-data-double-escaped-dash-dash-state              states::ScriptDataEscapedDashDash(kind) => loop {                  match get_char!(self, input) { -                    '-' => go!(self: emit '-'), +                    '-' => { +                        go!(self: emit '-'); +                    }                      '<' => {                          if kind == DoubleEscaped { -                            go!(self: emit '<'); +                            { +                                go!(self: emit '<'); +                            };                          } -                        go!(self: to RawLessThanSign ScriptDataEscaped kind); +                        { +                            go!(self: to RawLessThanSign ScriptDataEscaped kind); +                        }; +                    } +                    '>' => { +                        go!(self: emit '>'); +                        go!(self: to RawData ScriptData); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: emit '\u{fffd}'); +                        go!(self: to RawData ScriptDataEscaped kind); +                    } +                    c => { +                        go!(self: emit c); +                        go!(self: to RawData ScriptDataEscaped kind);                      } -                    '>' => go!(self: emit '>'; to RawData ScriptData), -                    '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind), -                    c => go!(self: emit c; to RawData ScriptDataEscaped kind),                  }              }, @@ -924,11 +1124,20 @@ impl<Sink: TokenSink> Tokenizer<Sink> {                          } else {                              DoubleEscaped                          }; -                        go!(self: emit c; to RawData ScriptDataEscaped esc); +                        { +                            go!(self: emit c); +                            go!(self: to RawData ScriptDataEscaped esc); +                        };                      }                      _ => match lower_ascii_letter(c) { -                        Some(cl) => go!(self: push_temp cl; emit c), -                        None => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped), +                        Some(cl) => { +                            go!(self: push_temp cl); +                            go!(self: emit c); +                        } +                        None => { +                            self.reconsume = true; +                            go!(self: to RawData ScriptDataEscaped DoubleEscaped); +                        }                      },                  }              }, @@ -937,15 +1146,29 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              states::BeforeAttributeName => loop {                  match get_char!(self, input) {                      '\t' | '\n' | '\x0C' | ' ' => (), -                    '/' => go!(self: to SelfClosingStartTag), -                    '>' => go!(self: emit_tag Data), -                    '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName), +                    '/' => { +                        go!(self: to SelfClosingStartTag); +                    } +                    '>' => { +                        go!(self: emit_tag Data); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: create_attr '\u{fffd}'); +                        go!(self: to AttributeName); +                    }                      c => match lower_ascii_letter(c) { -                        Some(cl) => go!(self: create_attr cl; to AttributeName), +                        Some(cl) => { +                            go!(self: create_attr cl); +                            go!(self: to AttributeName); +                        }                          None => {                              go_match!(self: c,                              '"' , '\'' , '<' , '=' => error); -                            go!(self: create_attr c; to AttributeName); +                            { +                                go!(self: create_attr c); +                                go!(self: to AttributeName); +                            };                          }                      },                  } @@ -954,17 +1177,32 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ attribute-name-state              states::AttributeName => loop {                  match get_char!(self, input) { -                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to AfterAttributeName), -                    '/' => go!(self: to SelfClosingStartTag), -                    '=' => go!(self: to BeforeAttributeValue), -                    '>' => go!(self: emit_tag Data), -                    '\0' => go!(self: error; push_name '\u{fffd}'), +                    '\t' | '\n' | '\x0C' | ' ' => { +                        go!(self: to AfterAttributeName); +                    } +                    '/' => { +                        go!(self: to SelfClosingStartTag); +                    } +                    '=' => { +                        go!(self: to BeforeAttributeValue); +                    } +                    '>' => { +                        go!(self: emit_tag Data); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: push_name '\u{fffd}'); +                    }                      c => match lower_ascii_letter(c) { -                        Some(cl) => go!(self: push_name cl), +                        Some(cl) => { +                            go!(self: push_name cl); +                        }                          None => {                              go_match!(self: c,                              '"' , '\'' , '<' => error); -                            go!(self: push_name c); +                            { +                                go!(self: push_name c); +                            };                          }                      },                  } @@ -974,16 +1212,32 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              states::AfterAttributeName => loop {                  match get_char!(self, input) {                      '\t' | '\n' | '\x0C' | ' ' => (), -                    '/' => go!(self: to SelfClosingStartTag), -                    '=' => go!(self: to BeforeAttributeValue), -                    '>' => go!(self: emit_tag Data), -                    '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName), +                    '/' => { +                        go!(self: to SelfClosingStartTag); +                    } +                    '=' => { +                        go!(self: to BeforeAttributeValue); +                    } +                    '>' => { +                        go!(self: emit_tag Data); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: create_attr '\u{fffd}'); +                        go!(self: to AttributeName); +                    }                      c => match lower_ascii_letter(c) { -                        Some(cl) => go!(self: create_attr cl; to AttributeName), +                        Some(cl) => { +                            go!(self: create_attr cl); +                            go!(self: to AttributeName); +                        }                          None => {                              go_match!(self: c,                              '"' , '\'' , '<' => error); -                            go!(self: create_attr c; to AttributeName); +                            { +                                go!(self: create_attr c); +                                go!(self: to AttributeName); +                            };                          }                      },                  } @@ -994,36 +1248,75 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              // hopefully in the same zero-copy buffer.              states::BeforeAttributeValue => loop {                  match peek!(self, input) { -                    '\t' | '\n' | '\r' | '\x0C' | ' ' => go!(self: discard_char input), -                    '"' => go!(self: discard_char input; to AttributeValue DoubleQuoted), -                    '\'' => go!(self: discard_char input; to AttributeValue SingleQuoted), +                    '\t' | '\n' | '\r' | '\x0C' | ' ' => { +                        go!(self: discard_char input); +                    } +                    '"' => { +                        go!(self: discard_char input); +                        go!(self: to AttributeValue DoubleQuoted); +                    } +                    '\'' => { +                        go!(self: discard_char input); +                        go!(self: to AttributeValue SingleQuoted); +                    }                      '\0' => { -                        go!(self: discard_char input; error; push_value '\u{fffd}'; to AttributeValue Unquoted) +                        go!(self: discard_char input); +                        go!(self: error); +                        go!(self: push_value '\u{fffd}'); +                        go!(self: to AttributeValue Unquoted); +                    } +                    '>' => { +                        go!(self: discard_char input); +                        go!(self: error); +                        go!(self: emit_tag Data); +                    } +                    _ => { +                        go!(self: to AttributeValue Unquoted);                      } -                    '>' => go!(self: discard_char input; error; emit_tag Data), -                    _ => go!(self: to AttributeValue Unquoted),                  }              },              //§ attribute-value-(double-quoted)-state              states::AttributeValue(DoubleQuoted) => loop {                  match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n')) { -                    FromSet('"') => go!(self: to AfterAttributeValueQuoted), -                    FromSet('&') => go!(self: consume_char_ref '"'), -                    FromSet('\0') => go!(self: error; push_value '\u{fffd}'), -                    FromSet(c) => go!(self: push_value c), -                    NotFromSet(ref b) => go!(self: append_value b), +                    FromSet('"') => { +                        go!(self: to AfterAttributeValueQuoted); +                    } +                    FromSet('&') => { +                        go!(self: consume_char_ref '"'); +                    } +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: push_value '\u{fffd}'); +                    } +                    FromSet(c) => { +                        go!(self: push_value c); +                    } +                    NotFromSet(ref b) => { +                        go!(self: append_value b); +                    }                  }              },              //§ attribute-value-(single-quoted)-state              states::AttributeValue(SingleQuoted) => loop {                  match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n')) { -                    FromSet('\'') => go!(self: to AfterAttributeValueQuoted), -                    FromSet('&') => go!(self: consume_char_ref '\''), -                    FromSet('\0') => go!(self: error; push_value '\u{fffd}'), -                    FromSet(c) => go!(self: push_value c), -                    NotFromSet(ref b) => go!(self: append_value b), +                    FromSet('\'') => { +                        go!(self: to AfterAttributeValueQuoted); +                    } +                    FromSet('&') => { +                        go!(self: consume_char_ref '\''); +                    } +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: push_value '\u{fffd}'); +                    } +                    FromSet(c) => { +                        go!(self: push_value c); +                    } +                    NotFromSet(ref b) => { +                        go!(self: append_value b); +                    }                  }              }, @@ -1035,27 +1328,48 @@ impl<Sink: TokenSink> Tokenizer<Sink> {                      small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0')                  ) {                      FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => { -                        go!(self: to BeforeAttributeName) +                        go!(self: to BeforeAttributeName); +                    } +                    FromSet('&') => { +                        go!(self: consume_char_ref '>'); +                    } +                    FromSet('>') => { +                        go!(self: emit_tag Data); +                    } +                    FromSet('\0') => { +                        go!(self: error); +                        go!(self: push_value '\u{fffd}');                      } -                    FromSet('&') => go!(self: consume_char_ref '>'), -                    FromSet('>') => go!(self: emit_tag Data), -                    FromSet('\0') => go!(self: error; push_value '\u{fffd}'),                      FromSet(c) => {                          go_match!(self: c,                              '"' , '\'' , '<' , '=' , '`' => error); -                        go!(self: push_value c); +                        { +                            go!(self: push_value c); +                        }; +                    } +                    NotFromSet(ref b) => { +                        go!(self: append_value b);                      } -                    NotFromSet(ref b) => go!(self: append_value b),                  }              },              //§ after-attribute-value-(quoted)-state              states::AfterAttributeValueQuoted => loop {                  match get_char!(self, input) { -                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName), -                    '/' => go!(self: to SelfClosingStartTag), -                    '>' => go!(self: emit_tag Data), -                    _ => go!(self: error; reconsume BeforeAttributeName), +                    '\t' | '\n' | '\x0C' | ' ' => { +                        go!(self: to BeforeAttributeName); +                    } +                    '/' => { +                        go!(self: to SelfClosingStartTag); +                    } +                    '>' => { +                        go!(self: emit_tag Data); +                    } +                    _ => { +                        go!(self: error); +                        self.reconsume = true; +                        go!(self: to BeforeAttributeName); +                    }                  }              }, @@ -1064,76 +1378,164 @@ impl<Sink: TokenSink> Tokenizer<Sink> {                  match get_char!(self, input) {                      '>' => {                          self.current_tag_self_closing = true; -                        go!(self: emit_tag Data); +                        { +                            go!(self: emit_tag Data); +                        }; +                    } +                    _ => { +                        go!(self: error); +                        self.reconsume = true; +                        go!(self: to BeforeAttributeName);                      } -                    _ => go!(self: error; reconsume BeforeAttributeName),                  }              },              //§ comment-start-state              states::CommentStart => loop {                  match get_char!(self, input) { -                    '-' => go!(self: to CommentStartDash), -                    '\0' => go!(self: error; push_comment '\u{fffd}'; to Comment), -                    '>' => go!(self: error; emit_comment; to Data), -                    c => go!(self: push_comment c; to Comment), +                    '-' => { +                        go!(self: to CommentStartDash); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: push_comment '\u{fffd}'); +                        go!(self: to Comment); +                    } +                    '>' => { +                        go!(self: error); +                        go!(self: emit_comment); +                        go!(self: to Data); +                    } +                    c => { +                        go!(self: push_comment c); +                        go!(self: to Comment); +                    }                  }              },              //§ comment-start-dash-state              states::CommentStartDash => loop {                  match get_char!(self, input) { -                    '-' => go!(self: to CommentEnd), -                    '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment), -                    '>' => go!(self: error; emit_comment; to Data), -                    c => go!(self: push_comment '-'; push_comment c; to Comment), +                    '-' => { +                        go!(self: to CommentEnd); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: append_comment "-\u{fffd}"); +                        go!(self: to Comment); +                    } +                    '>' => { +                        go!(self: error); +                        go!(self: emit_comment); +                        go!(self: to Data); +                    } +                    c => { +                        go!(self: push_comment '-'); +                        go!(self: push_comment c); +                        go!(self: to Comment); +                    }                  }              },              //§ comment-state              states::Comment => loop {                  match get_char!(self, input) { -                    '-' => go!(self: to CommentEndDash), -                    '\0' => go!(self: error; push_comment '\u{fffd}'), -                    c => go!(self: push_comment c), +                    '-' => { +                        go!(self: to CommentEndDash); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: push_comment '\u{fffd}'); +                    } +                    c => { +                        go!(self: push_comment c); +                    }                  }              },              //§ comment-end-dash-state              states::CommentEndDash => loop {                  match get_char!(self, input) { -                    '-' => go!(self: to CommentEnd), -                    '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment), -                    c => go!(self: push_comment '-'; push_comment c; to Comment), +                    '-' => { +                        go!(self: to CommentEnd); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: append_comment "-\u{fffd}"); +                        go!(self: to Comment); +                    } +                    c => { +                        go!(self: push_comment '-'); +                        go!(self: push_comment c); +                        go!(self: to Comment); +                    }                  }              },              //§ comment-end-state              states::CommentEnd => loop {                  match get_char!(self, input) { -                    '>' => go!(self: emit_comment; to Data), -                    '\0' => go!(self: error; append_comment "--\u{fffd}"; to Comment), -                    '!' => go!(self: error; to CommentEndBang), -                    '-' => go!(self: error; push_comment '-'), -                    c => go!(self: error; append_comment "--"; push_comment c; to Comment), +                    '>' => { +                        go!(self: emit_comment); +                        go!(self: to Data); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: append_comment "--\u{fffd}"); +                        go!(self: to Comment); +                    } +                    '!' => { +                        go!(self: error); +                        go!(self: to CommentEndBang); +                    } +                    '-' => { +                        go!(self: error); +                        go!(self: push_comment '-'); +                    } +                    c => { +                        go!(self: error); +                        go!(self: append_comment "--"); +                        go!(self: push_comment c); +                        go!(self: to Comment); +                    }                  }              },              //§ comment-end-bang-state              states::CommentEndBang => loop {                  match get_char!(self, input) { -                    '-' => go!(self: append_comment "--!"; to CommentEndDash), -                    '>' => go!(self: emit_comment; to Data), -                    '\0' => go!(self: error; append_comment "--!\u{fffd}"; to Comment), -                    c => go!(self: append_comment "--!"; push_comment c; to Comment), +                    '-' => { +                        go!(self: append_comment "--!"); +                        go!(self: to CommentEndDash); +                    } +                    '>' => { +                        go!(self: emit_comment); +                        go!(self: to Data); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: append_comment "--!\u{fffd}"); +                        go!(self: to Comment); +                    } +                    c => { +                        go!(self: append_comment "--!"); +                        go!(self: push_comment c); +                        go!(self: to Comment); +                    }                  }              },              //§ doctype-state              states::Doctype => loop {                  match get_char!(self, input) { -                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeName), -                    _ => go!(self: error; reconsume BeforeDoctypeName), +                    '\t' | '\n' | '\x0C' | ' ' => { +                        go!(self: to BeforeDoctypeName); +                    } +                    _ => { +                        go!(self: error); +                        self.reconsume = true; +                        go!(self: to BeforeDoctypeName); +                    }                  }              }, @@ -1142,11 +1544,22 @@ impl<Sink: TokenSink> Tokenizer<Sink> {                  match get_char!(self, input) {                      '\t' | '\n' | '\x0C' | ' ' => (),                      '\0' => { -                        go!(self: error; create_doctype; push_doctype_name '\u{fffd}'; to DoctypeName) +                        go!(self: error); +                        go!(self: create_doctype); +                        go!(self: push_doctype_name '\u{fffd}'); +                        go!(self: to DoctypeName); +                    } +                    '>' => { +                        go!(self: error); +                        go!(self: create_doctype); +                        go!(self: force_quirks); +                        go!(self: emit_doctype); +                        go!(self: to Data);                      } -                    '>' => go!(self: error; create_doctype; force_quirks; emit_doctype; to Data),                      c => { -                        go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase()); to DoctypeName) +                        go!(self: create_doctype); +                        go!(self: push_doctype_name (c.to_ascii_lowercase())); +                        go!(self: to DoctypeName);                      }                  }              }, @@ -1154,24 +1567,46 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ doctype-name-state              states::DoctypeName => loop {                  match get_char!(self, input) { -                    '\t' | '\n' | '\x0C' | ' ' => go!(self: clear_temp; to AfterDoctypeName), -                    '>' => go!(self: emit_doctype; to Data), -                    '\0' => go!(self: error; push_doctype_name '\u{fffd}'), -                    c => go!(self: push_doctype_name (c.to_ascii_lowercase())), +                    '\t' | '\n' | '\x0C' | ' ' => { +                        go!(self: clear_temp); +                        go!(self: to AfterDoctypeName); +                    } +                    '>' => { +                        go!(self: emit_doctype); +                        go!(self: to Data); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: push_doctype_name '\u{fffd}'); +                    } +                    c => { +                        go!(self: push_doctype_name (c.to_ascii_lowercase())); +                    }                  }              },              //§ after-doctype-name-state              states::AfterDoctypeName => loop {                  if eat!(self, input, "public") { -                    go!(self: to AfterDoctypeKeyword Public); +                    { +                        go!(self: to AfterDoctypeKeyword Public); +                    };                  } else if eat!(self, input, "system") { -                    go!(self: to AfterDoctypeKeyword System); +                    { +                        go!(self: to AfterDoctypeKeyword System); +                    };                  } else {                      match get_char!(self, input) {                          '\t' | '\n' | '\x0C' | ' ' => (), -                        '>' => go!(self: emit_doctype; to Data), -                        _ => go!(self: error; force_quirks; to BogusDoctype), +                        '>' => { +                            go!(self: emit_doctype); +                            go!(self: to Data); +                        } +                        _ => { +                            go!(self: error); +                            go!(self: force_quirks); +                            go!(self: to BogusDoctype); +                        }                      }                  }              }, @@ -1179,15 +1614,30 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ after-doctype-public-keyword-state after-doctype-system-keyword-state              states::AfterDoctypeKeyword(kind) => loop {                  match get_char!(self, input) { -                    '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier kind), +                    '\t' | '\n' | '\x0C' | ' ' => { +                        go!(self: to BeforeDoctypeIdentifier kind); +                    }                      '"' => { -                        go!(self: error; clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind) +                        go!(self: error); +                        go!(self: clear_doctype_id kind); +                        go!(self: to DoctypeIdentifierDoubleQuoted kind);                      }                      '\'' => { -                        go!(self: error; clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind) +                        go!(self: error); +                        go!(self: clear_doctype_id kind); +                        go!(self: to DoctypeIdentifierSingleQuoted kind); +                    } +                    '>' => { +                        go!(self: error); +                        go!(self: force_quirks); +                        go!(self: emit_doctype); +                        go!(self: to Data); +                    } +                    _ => { +                        go!(self: error); +                        go!(self: force_quirks); +                        go!(self: to BogusDoctype);                      } -                    '>' => go!(self: error; force_quirks; emit_doctype; to Data), -                    _ => go!(self: error; force_quirks; to BogusDoctype),                  }              }, @@ -1195,30 +1645,69 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              states::BeforeDoctypeIdentifier(kind) => loop {                  match get_char!(self, input) {                      '\t' | '\n' | '\x0C' | ' ' => (), -                    '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind), -                    '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind), -                    '>' => go!(self: error; force_quirks; emit_doctype; to Data), -                    _ => go!(self: error; force_quirks; to BogusDoctype), +                    '"' => { +                        go!(self: clear_doctype_id kind); +                        go!(self: to DoctypeIdentifierDoubleQuoted kind); +                    } +                    '\'' => { +                        go!(self: clear_doctype_id kind); +                        go!(self: to DoctypeIdentifierSingleQuoted kind); +                    } +                    '>' => { +                        go!(self: error); +                        go!(self: force_quirks); +                        go!(self: emit_doctype); +                        go!(self: to Data); +                    } +                    _ => { +                        go!(self: error); +                        go!(self: force_quirks); +                        go!(self: to BogusDoctype); +                    }                  }              },              //§ doctype-public-identifier-(double-quoted)-state doctype-system-identifier-(double-quoted)-state              states::DoctypeIdentifierDoubleQuoted(kind) => loop {                  match get_char!(self, input) { -                    '"' => go!(self: to AfterDoctypeIdentifier kind), -                    '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'), -                    '>' => go!(self: error; force_quirks; emit_doctype; to Data), -                    c => go!(self: push_doctype_id kind c), +                    '"' => { +                        go!(self: to AfterDoctypeIdentifier kind); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: push_doctype_id kind '\u{fffd}'); +                    } +                    '>' => { +                        go!(self: error); +                        go!(self: force_quirks); +                        go!(self: emit_doctype); +                        go!(self: to Data); +                    } +                    c => { +                        go!(self: push_doctype_id kind c); +                    }                  }              },              //§ doctype-public-identifier-(single-quoted)-state doctype-system-identifier-(single-quoted)-state              states::DoctypeIdentifierSingleQuoted(kind) => loop {                  match get_char!(self, input) { -                    '\'' => go!(self: to AfterDoctypeIdentifier kind), -                    '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'), -                    '>' => go!(self: error; force_quirks; emit_doctype; to Data), -                    c => go!(self: push_doctype_id kind c), +                    '\'' => { +                        go!(self: to AfterDoctypeIdentifier kind); +                    } +                    '\0' => { +                        go!(self: error); +                        go!(self: push_doctype_id kind '\u{fffd}'); +                    } +                    '>' => { +                        go!(self: error); +                        go!(self: force_quirks); +                        go!(self: emit_doctype); +                        go!(self: to Data); +                    } +                    c => { +                        go!(self: push_doctype_id kind c); +                    }                  }              }, @@ -1226,16 +1715,27 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              states::AfterDoctypeIdentifier(Public) => loop {                  match get_char!(self, input) {                      '\t' | '\n' | '\x0C' | ' ' => { -                        go!(self: to BetweenDoctypePublicAndSystemIdentifiers) +                        go!(self: to BetweenDoctypePublicAndSystemIdentifiers); +                    } +                    '>' => { +                        go!(self: emit_doctype); +                        go!(self: to Data);                      } -                    '>' => go!(self: emit_doctype; to Data),                      '"' => { -                        go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System) +                        go!(self: error); +                        go!(self: clear_doctype_id System); +                        go!(self: to DoctypeIdentifierDoubleQuoted System);                      }                      '\'' => { -                        go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System) +                        go!(self: error); +                        go!(self: clear_doctype_id System); +                        go!(self: to DoctypeIdentifierSingleQuoted System); +                    } +                    _ => { +                        go!(self: error); +                        go!(self: force_quirks); +                        go!(self: to BogusDoctype);                      } -                    _ => go!(self: error; force_quirks; to BogusDoctype),                  }              }, @@ -1243,8 +1743,14 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              states::AfterDoctypeIdentifier(System) => loop {                  match get_char!(self, input) {                      '\t' | '\n' | '\x0C' | ' ' => (), -                    '>' => go!(self: emit_doctype; to Data), -                    _ => go!(self: error; to BogusDoctype), +                    '>' => { +                        go!(self: emit_doctype); +                        go!(self: to Data); +                    } +                    _ => { +                        go!(self: error); +                        go!(self: to BogusDoctype); +                    }                  }              }, @@ -1252,21 +1758,33 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              states::BetweenDoctypePublicAndSystemIdentifiers => loop {                  match get_char!(self, input) {                      '\t' | '\n' | '\x0C' | ' ' => (), -                    '>' => go!(self: emit_doctype; to Data), +                    '>' => { +                        go!(self: emit_doctype); +                        go!(self: to Data); +                    }                      '"' => { -                        go!(self: clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System) +                        go!(self: clear_doctype_id System); +                        go!(self: to DoctypeIdentifierDoubleQuoted System);                      }                      '\'' => { -                        go!(self: clear_doctype_id System; to DoctypeIdentifierSingleQuoted System) +                        go!(self: clear_doctype_id System); +                        go!(self: to DoctypeIdentifierSingleQuoted System); +                    } +                    _ => { +                        go!(self: error); +                        go!(self: force_quirks); +                        go!(self: to BogusDoctype);                      } -                    _ => go!(self: error; force_quirks; to BogusDoctype),                  }              },              //§ bogus-doctype-state              states::BogusDoctype => loop {                  match get_char!(self, input) { -                    '>' => go!(self: emit_doctype; to Data), +                    '>' => { +                        go!(self: emit_doctype); +                        go!(self: to Data); +                    }                      _ => (),                  }              }, @@ -1274,52 +1792,93 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              //§ bogus-comment-state              states::BogusComment => loop {                  match get_char!(self, input) { -                    '>' => go!(self: emit_comment; to Data), -                    '\0' => go!(self: push_comment '\u{fffd}'), -                    c => go!(self: push_comment c), +                    '>' => { +                        go!(self: emit_comment); +                        go!(self: to Data); +                    } +                    '\0' => { +                        go!(self: push_comment '\u{fffd}'); +                    } +                    c => { +                        go!(self: push_comment c); +                    }                  }              },              //§ markup-declaration-open-state              states::MarkupDeclarationOpen => loop {                  if eat_exact!(self, input, "--") { -                    go!(self: clear_comment; to CommentStart); +                    { +                        go!(self: clear_comment); +                        go!(self: to CommentStart); +                    };                  } else if eat!(self, input, "doctype") { -                    go!(self: to Doctype); +                    { +                        go!(self: to Doctype); +                    };                  } else {                      if self                          .sink                          .adjusted_current_node_present_but_not_in_html_namespace()                      {                          if eat_exact!(self, input, "[CDATA[") { -                            go!(self: clear_temp; to CdataSection); +                            { +                                go!(self: clear_temp); +                                go!(self: to CdataSection); +                            };                          }                      } -                    go!(self: error; to BogusComment); +                    { +                        go!(self: error); +                        go!(self: to BogusComment); +                    };                  }              },              //§ cdata-section-state              states::CdataSection => loop {                  match get_char!(self, input) { -                    ']' => go!(self: to CdataSectionBracket), -                    '\0' => go!(self: emit_temp; emit '\0'), -                    c => go!(self: push_temp c), +                    ']' => { +                        go!(self: to CdataSectionBracket); +                    } +                    '\0' => { +                        go!(self: emit_temp); +                        go!(self: emit '\0'); +                    } +                    c => { +                        go!(self: push_temp c); +                    }                  }              },              //§ cdata-section-bracket              states::CdataSectionBracket => match get_char!(self, input) { -                ']' => go!(self: to CdataSectionEnd), -                _ => go!(self: push_temp ']'; reconsume CdataSection), +                ']' => { +                    go!(self: to CdataSectionEnd); +                } +                _ => { +                    go!(self: push_temp ']'); +                    self.reconsume = true; +                    go!(self: to CdataSection); +                }              },              //§ cdata-section-end              states::CdataSectionEnd => loop {                  match get_char!(self, input) { -                    ']' => go!(self: push_temp ']'), -                    '>' => go!(self: emit_temp; to Data), -                    _ => go!(self: push_temp ']'; push_temp ']'; reconsume CdataSection), +                    ']' => { +                        go!(self: push_temp ']'); +                    } +                    '>' => { +                        go!(self: emit_temp); +                        go!(self: to Data); +                    } +                    _ => { +                        go!(self: push_temp ']'); +                        go!(self: push_temp ']'); +                        self.reconsume = true; +                        go!(self: to CdataSection); +                    }                  }              },              //§ END @@ -1360,9 +1919,13 @@ impl<Sink: TokenSink> Tokenizer<Sink> {          for i in 0..num_chars {              let c = chars[i as usize];              match self.state { -                states::Data | states::RawData(states::Rcdata) => go!(self: emit c), +                states::Data | states::RawData(states::Rcdata) => { +                    go!(self: emit c); +                } -                states::AttributeValue(_) => go!(self: push_value c), +                states::AttributeValue(_) => { +                    go!(self: push_value c); +                }                  _ => panic!(                      "state {:?} should not be reachable in process_char_ref", @@ -1446,30 +2009,56 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              | states::AfterAttributeValueQuoted              | states::SelfClosingStartTag              | states::ScriptDataEscapedDash(_) -            | states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data), +            | states::ScriptDataEscapedDashDash(_) => { +                go!(self: error_eof); +                go!(self: to Data); +            } -            states::TagOpen => go!(self: error_eof; emit '<'; to Data), +            states::TagOpen => { +                go!(self: error_eof); +                go!(self: emit '<'); +                go!(self: to Data); +            } -            states::EndTagOpen => go!(self: error_eof; emit '<'; emit '/'; to Data), +            states::EndTagOpen => { +                go!(self: error_eof); +                go!(self: emit '<'); +                go!(self: emit '/'); +                go!(self: to Data); +            }              states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => { -                go!(self: to RawData ScriptDataEscaped DoubleEscaped) +                go!(self: to RawData ScriptDataEscaped DoubleEscaped);              } -            states::RawLessThanSign(kind) => go!(self: emit '<'; to RawData kind), +            states::RawLessThanSign(kind) => { +                go!(self: emit '<'); +                go!(self: to RawData kind); +            } -            states::RawEndTagOpen(kind) => go!(self: emit '<'; emit '/'; to RawData kind), +            states::RawEndTagOpen(kind) => { +                go!(self: emit '<'); +                go!(self: emit '/'); +                go!(self: to RawData kind); +            }              states::RawEndTagName(kind) => { -                go!(self: emit '<'; emit '/'; emit_temp; to RawData kind) +                go!(self: emit '<'); +                go!(self: emit '/'); +                go!(self: emit_temp); +                go!(self: to RawData kind);              } -            states::ScriptDataEscapeStart(kind) => go!(self: to RawData ScriptDataEscaped kind), +            states::ScriptDataEscapeStart(kind) => { +                go!(self: to RawData ScriptDataEscaped kind); +            } -            states::ScriptDataEscapeStartDash => go!(self: to RawData ScriptData), +            states::ScriptDataEscapeStartDash => { +                go!(self: to RawData ScriptData); +            }              states::ScriptDataDoubleEscapeEnd => { -                go!(self: to RawData ScriptDataEscaped DoubleEscaped) +                go!(self: to RawData ScriptDataEscaped DoubleEscaped);              }              states::CommentStart @@ -1477,10 +2066,18 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              | states::Comment              | states::CommentEndDash              | states::CommentEnd -            | states::CommentEndBang => go!(self: error_eof; emit_comment; to Data), +            | states::CommentEndBang => { +                go!(self: error_eof); +                go!(self: emit_comment); +                go!(self: to Data); +            }              states::Doctype | states::BeforeDoctypeName => { -                go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data) +                go!(self: error_eof); +                go!(self: create_doctype); +                go!(self: force_quirks); +                go!(self: emit_doctype); +                go!(self: to Data);              }              states::DoctypeName @@ -1491,20 +2088,43 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              | states::DoctypeIdentifierSingleQuoted(_)              | states::AfterDoctypeIdentifier(_)              | states::BetweenDoctypePublicAndSystemIdentifiers => { -                go!(self: error_eof; force_quirks; emit_doctype; to Data) +                go!(self: error_eof); +                go!(self: force_quirks); +                go!(self: emit_doctype); +                go!(self: to Data);              } -            states::BogusDoctype => go!(self: emit_doctype; to Data), +            states::BogusDoctype => { +                go!(self: emit_doctype); +                go!(self: to Data); +            } -            states::BogusComment => go!(self: emit_comment; to Data), +            states::BogusComment => { +                go!(self: emit_comment); +                go!(self: to Data); +            } -            states::MarkupDeclarationOpen => go!(self: error; to BogusComment), +            states::MarkupDeclarationOpen => { +                go!(self: error); +                go!(self: to BogusComment); +            } -            states::CdataSection => go!(self: emit_temp; error_eof; to Data), +            states::CdataSection => { +                go!(self: emit_temp); +                go!(self: error_eof); +                go!(self: to Data); +            } -            states::CdataSectionBracket => go!(self: push_temp ']'; to CdataSection), +            states::CdataSectionBracket => { +                go!(self: push_temp ']'); +                go!(self: to CdataSection); +            } -            states::CdataSectionEnd => go!(self: push_temp ']'; push_temp ']'; to CdataSection), +            states::CdataSectionEnd => { +                go!(self: push_temp ']'); +                go!(self: push_temp ']'); +                go!(self: to CdataSection); +            }          }      }  } | 
