diff options
author | Martin Fischer <martin@push-f.com> | 2021-11-29 13:45:22 +0100 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2021-11-29 15:11:01 +0100 |
commit | d9800cced5f66f989adf42d8b3dfbc1bf0825cee (patch) | |
tree | a201e0e768346280fe1e7fe73e1d07d47a35d93a /src | |
parent | 66d788cd56f116774a43257aca3f1f2fdad8f47e (diff) |
refactor: use ? operator for explicit control flow
Diffstat (limited to 'src')
-rw-r--r-- | src/tokenizer/mod.rs | 141 |
1 files changed, 78 insertions, 63 deletions
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 02f7963..3f637aa 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -246,7 +246,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { if self.ignore_lf { self.ignore_lf = false; if c == '\n' { - c = unwrap_or_return!(input.next(), None); + c = input.next()?; } } @@ -633,23 +633,38 @@ macro_rules! go_match ( ( $me:ident : $x:expr, $($pats:pat),+ => $($cmds:tt)* ) // This is a macro because it can cause early return // from the function where it is used. macro_rules! get_char ( ($me:expr, $input:expr) => ( - unwrap_or_return!($me.get_char($input), ControlFlow::Break(ProcessResult::Suspend)) + match $me.get_char($input) { + Some(char) => ControlFlow::Continue(char), + None => ControlFlow::Break(ProcessResult::Suspend) + } )); macro_rules! peek ( ($me:expr, $input:expr) => ( - unwrap_or_return!($me.peek($input), ControlFlow::Break(ProcessResult::Suspend)) + match $me.peek($input) { + Some(char) => ControlFlow::Continue(char), + None => ControlFlow::Break(ProcessResult::Suspend) + } )); macro_rules! pop_except_from ( ($me:expr, $input:expr, $set:expr) => ( - unwrap_or_return!($me.pop_except_from($input, $set), ControlFlow::Break(ProcessResult::Suspend)) + match $me.pop_except_from($input, $set) { + Some(char) => ControlFlow::Continue(char), + None => ControlFlow::Break(ProcessResult::Suspend) + } )); macro_rules! eat ( ($me:expr, $input:expr, $pat:expr) => ( - unwrap_or_return!($me.eat($input, $pat, u8::eq_ignore_ascii_case), ControlFlow::Break(ProcessResult::Suspend)) + match $me.eat($input, $pat, u8::eq_ignore_ascii_case) { + Some(char) => ControlFlow::Continue(char), + None => ControlFlow::Break(ProcessResult::Suspend) + } )); macro_rules! eat_exact ( ($me:expr, $input:expr, $pat:expr) => ( - unwrap_or_return!($me.eat($input, $pat, u8::eq), ControlFlow::Break(ProcessResult::Suspend)) + match $me.eat($input, $pat, u8::eq) { + Some(char) => ControlFlow::Continue(char), + None => ControlFlow::Break(ProcessResult::Suspend) + } )); impl<Sink: TokenSink> Tokenizer<Sink> { @@ -665,7 +680,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { match self.state { //§ data-state states::Data => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) { + match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n'))? { FromSet('\0') => { go!(self: error); go!(self: emit '\0'); @@ -685,7 +700,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ rcdata-state states::RawData(Rcdata) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) { + match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n'))? { FromSet('\0') => { go!(self: error); go!(self: emit '\u{fffd}'); @@ -705,7 +720,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ rawtext-state states::RawData(Rawtext) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) { + match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n'))? { FromSet('\0') => { go!(self: error); go!(self: emit '\u{fffd}'); @@ -722,7 +737,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-state states::RawData(ScriptData) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) { + match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n'))? { FromSet('\0') => { go!(self: error); go!(self: emit '\u{fffd}'); @@ -739,7 +754,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-escaped-state states::RawData(ScriptDataEscaped(Escaped)) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) { + match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n'))? { FromSet('\0') => { go!(self: error); go!(self: emit '\u{fffd}'); @@ -760,7 +775,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-double-escaped-state states::RawData(ScriptDataEscaped(DoubleEscaped)) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) { + match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n'))? { FromSet('\0') => { go!(self: error); go!(self: emit '\u{fffd}'); @@ -782,7 +797,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ plaintext-state states::Plaintext => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n')) { + match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n'))? { FromSet('\0') => { go!(self: error); go!(self: emit '\u{fffd}'); @@ -796,7 +811,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ tag-open-state states::TagOpen => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '!' => { go!(self: clear_temp); return go!(self: to MarkupDeclarationOpen); @@ -827,7 +842,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ end-tag-open-state states::EndTagOpen => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '>' => { go!(self: error); return go!(self: to Data); @@ -855,7 +870,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ tag-name-state states::TagName => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => { return go!(self: to BeforeAttributeName); } @@ -877,7 +892,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-escaped-less-than-sign-state states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '/' => { go!(self: clear_temp); return go!(self: to RawEndTagOpen ScriptDataEscaped Escaped); @@ -901,7 +916,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-double-escaped-less-than-sign-state states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '/' => { go!(self: clear_temp); go!(self: emit '/'); @@ -917,7 +932,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ rcdata-less-than-sign-state rawtext-less-than-sign-state script-data-less-than-sign-state // otherwise states::RawLessThanSign(kind) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '/' => { go!(self: clear_temp); return go!(self: to RawEndTagOpen kind); @@ -937,7 +952,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ rcdata-end-tag-open-state rawtext-end-tag-open-state script-data-end-tag-open-state script-data-escaped-end-tag-open-state states::RawEndTagOpen(kind) => loop { - let c = get_char!(self, input); + let c = get_char!(self, input)?; match lower_ascii_letter(c) { Some(cl) => { go!(self: create_tag EndTag cl); @@ -955,7 +970,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ rcdata-end-tag-name-state rawtext-end-tag-name-state script-data-end-tag-name-state script-data-escaped-end-tag-name-state states::RawEndTagName(kind) => loop { - let c = get_char!(self, input); + let c = get_char!(self, input)?; if self.have_appropriate_end_tag() { match c { '\t' | '\n' | '\x0C' | ' ' => { @@ -989,7 +1004,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-double-escape-start-state states::ScriptDataEscapeStart(DoubleEscaped) => loop { - let c = get_char!(self, input); + let c = get_char!(self, input)?; match c { '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => { let esc = if &*self.temp_buf == "script" { @@ -1017,7 +1032,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-escape-start-state states::ScriptDataEscapeStart(Escaped) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '-' => { go!(self: emit '-'); return go!(self: to ScriptDataEscapeStartDash); @@ -1031,7 +1046,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-escape-start-dash-state states::ScriptDataEscapeStartDash => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '-' => { go!(self: emit '-'); return go!(self: to ScriptDataEscapedDashDash Escaped); @@ -1045,7 +1060,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-escaped-dash-state script-data-double-escaped-dash-state states::ScriptDataEscapedDash(kind) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '-' => { go!(self: emit '-'); return go!(self: to ScriptDataEscapedDashDash kind); @@ -1074,7 +1089,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-escaped-dash-dash-state script-data-double-escaped-dash-dash-state states::ScriptDataEscapedDashDash(kind) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '-' => { go!(self: emit '-'); } @@ -1106,7 +1121,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ script-data-double-escape-end-state states::ScriptDataDoubleEscapeEnd => loop { - let c = get_char!(self, input); + let c = get_char!(self, input)?; match c { '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => { let esc = if &*self.temp_buf == "script" { @@ -1134,7 +1149,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ before-attribute-name-state states::BeforeAttributeName => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => (), '/' => { return go!(self: to SelfClosingStartTag); @@ -1166,7 +1181,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ attribute-name-state states::AttributeName => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => { return go!(self: to AfterAttributeName); } @@ -1200,7 +1215,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ after-attribute-name-state states::AfterAttributeName => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => (), '/' => { return go!(self: to SelfClosingStartTag); @@ -1237,7 +1252,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { // Use peek so we can handle the first attr character along with the rest, // hopefully in the same zero-copy buffer. states::BeforeAttributeValue => loop { - match peek!(self, input) { + match peek!(self, input)? { '\t' | '\n' | '\r' | '\x0C' | ' ' => { go!(self: discard_char input); } @@ -1268,7 +1283,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ attribute-value-(double-quoted)-state states::AttributeValue(DoubleQuoted) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n')) { + match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n'))? { FromSet('"') => { return go!(self: to AfterAttributeValueQuoted); } @@ -1290,7 +1305,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ attribute-value-(single-quoted)-state states::AttributeValue(SingleQuoted) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n')) { + match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n'))? { FromSet('\'') => { return go!(self: to AfterAttributeValueQuoted); } @@ -1316,7 +1331,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { self, input, small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0') - ) { + )? { FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => { return go!(self: to BeforeAttributeName); } @@ -1345,7 +1360,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ after-attribute-value-(quoted)-state states::AfterAttributeValueQuoted => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => { return go!(self: to BeforeAttributeName); } @@ -1365,7 +1380,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ self-closing-start-tag-state states::SelfClosingStartTag => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '>' => { self.current_tag_self_closing = true; { @@ -1382,7 +1397,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ comment-start-state states::CommentStart => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '-' => { return go!(self: to CommentStartDash); } @@ -1405,7 +1420,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ comment-start-dash-state states::CommentStartDash => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '-' => { return go!(self: to CommentEnd); } @@ -1429,7 +1444,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ comment-state states::Comment => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '-' => { return go!(self: to CommentEndDash); } @@ -1445,7 +1460,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ comment-end-dash-state states::CommentEndDash => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '-' => { return go!(self: to CommentEnd); } @@ -1464,7 +1479,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ comment-end-state states::CommentEnd => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '>' => { go!(self: emit_comment); return go!(self: to Data); @@ -1493,7 +1508,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ comment-end-bang-state states::CommentEndBang => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '-' => { go!(self: append_comment "--!"); return go!(self: to CommentEndDash); @@ -1517,7 +1532,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ doctype-state states::Doctype => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => { return go!(self: to BeforeDoctypeName); } @@ -1531,7 +1546,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ before-doctype-name-state states::BeforeDoctypeName => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => (), '\0' => { go!(self: error); @@ -1556,7 +1571,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ doctype-name-state states::DoctypeName => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => { go!(self: clear_temp); return go!(self: to AfterDoctypeName); @@ -1577,16 +1592,16 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ after-doctype-name-state states::AfterDoctypeName => loop { - if eat!(self, input, "public") { + if eat!(self, input, "public")? { { return go!(self: to AfterDoctypeKeyword Public); }; - } else if eat!(self, input, "system") { + } else if eat!(self, input, "system")? { { return go!(self: to AfterDoctypeKeyword System); }; } else { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => (), '>' => { go!(self: emit_doctype); @@ -1603,7 +1618,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ after-doctype-public-keyword-state after-doctype-system-keyword-state states::AfterDoctypeKeyword(kind) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => { return go!(self: to BeforeDoctypeIdentifier kind); } @@ -1633,7 +1648,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ before-doctype-public-identifier-state before-doctype-system-identifier-state states::BeforeDoctypeIdentifier(kind) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => (), '"' => { go!(self: clear_doctype_id kind); @@ -1659,7 +1674,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ doctype-public-identifier-(double-quoted)-state doctype-system-identifier-(double-quoted)-state states::DoctypeIdentifierDoubleQuoted(kind) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '"' => { return go!(self: to AfterDoctypeIdentifier kind); } @@ -1681,7 +1696,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ doctype-public-identifier-(single-quoted)-state doctype-system-identifier-(single-quoted)-state states::DoctypeIdentifierSingleQuoted(kind) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\'' => { return go!(self: to AfterDoctypeIdentifier kind); } @@ -1703,7 +1718,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ after-doctype-public-identifier-state states::AfterDoctypeIdentifier(Public) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => { return go!(self: to BetweenDoctypePublicAndSystemIdentifiers); } @@ -1731,7 +1746,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ after-doctype-system-identifier-state states::AfterDoctypeIdentifier(System) => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => (), '>' => { go!(self: emit_doctype); @@ -1746,7 +1761,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ between-doctype-public-and-system-identifiers-state states::BetweenDoctypePublicAndSystemIdentifiers => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '\t' | '\n' | '\x0C' | ' ' => (), '>' => { go!(self: emit_doctype); @@ -1770,7 +1785,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ bogus-doctype-state states::BogusDoctype => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '>' => { go!(self: emit_doctype); return go!(self: to Data); @@ -1781,7 +1796,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ bogus-comment-state states::BogusComment => loop { - match get_char!(self, input) { + match get_char!(self, input)? { '>' => { go!(self: emit_comment); return go!(self: to Data); @@ -1797,12 +1812,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ markup-declaration-open-state states::MarkupDeclarationOpen => loop { - if eat_exact!(self, input, "--") { + if eat_exact!(self, input, "--")? { { go!(self: clear_comment); return go!(self: to CommentStart); }; - } else if eat!(self, input, "doctype") { + } else if eat!(self, input, "doctype")? { { return go!(self: to Doctype); }; @@ -1811,7 +1826,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { .sink .adjusted_current_node_present_but_not_in_html_namespace() { - if eat_exact!(self, input, "[CDATA[") { + if eat_exact!(self, input, "[CDATA[")? { { go!(self: clear_temp); return go!(self: to CdataSection); @@ -1827,7 +1842,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ cdata-section-state states::CdataSection => loop { - match get_char!(self, input) { + match get_char!(self, input)? { ']' => { return go!(self: to CdataSectionBracket); } @@ -1842,7 +1857,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { }, //§ cdata-section-bracket - states::CdataSectionBracket => match get_char!(self, input) { + states::CdataSectionBracket => match get_char!(self, input)? { ']' => { return go!(self: to CdataSectionEnd); } @@ -1855,7 +1870,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { //§ cdata-section-end states::CdataSectionEnd => loop { - match get_char!(self, input) { + match get_char!(self, input)? { ']' => { go!(self: push_temp ']'); } |