summaryrefslogtreecommitdiff
path: root/src/tokenizer
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer')
-rw-r--r--src/tokenizer/mod.rs141
1 files changed, 78 insertions, 63 deletions
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index 02f7963..3f637aa 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -246,7 +246,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
if self.ignore_lf {
self.ignore_lf = false;
if c == '\n' {
- c = unwrap_or_return!(input.next(), None);
+ c = input.next()?;
}
}
@@ -633,23 +633,38 @@ macro_rules! go_match ( ( $me:ident : $x:expr, $($pats:pat),+ => $($cmds:tt)* )
// This is a macro because it can cause early return
// from the function where it is used.
macro_rules! get_char ( ($me:expr, $input:expr) => (
- unwrap_or_return!($me.get_char($input), ControlFlow::Break(ProcessResult::Suspend))
+ match $me.get_char($input) {
+ Some(char) => ControlFlow::Continue(char),
+ None => ControlFlow::Break(ProcessResult::Suspend)
+ }
));
macro_rules! peek ( ($me:expr, $input:expr) => (
- unwrap_or_return!($me.peek($input), ControlFlow::Break(ProcessResult::Suspend))
+ match $me.peek($input) {
+ Some(char) => ControlFlow::Continue(char),
+ None => ControlFlow::Break(ProcessResult::Suspend)
+ }
));
macro_rules! pop_except_from ( ($me:expr, $input:expr, $set:expr) => (
- unwrap_or_return!($me.pop_except_from($input, $set), ControlFlow::Break(ProcessResult::Suspend))
+ match $me.pop_except_from($input, $set) {
+ Some(char) => ControlFlow::Continue(char),
+ None => ControlFlow::Break(ProcessResult::Suspend)
+ }
));
macro_rules! eat ( ($me:expr, $input:expr, $pat:expr) => (
- unwrap_or_return!($me.eat($input, $pat, u8::eq_ignore_ascii_case), ControlFlow::Break(ProcessResult::Suspend))
+ match $me.eat($input, $pat, u8::eq_ignore_ascii_case) {
+ Some(char) => ControlFlow::Continue(char),
+ None => ControlFlow::Break(ProcessResult::Suspend)
+ }
));
macro_rules! eat_exact ( ($me:expr, $input:expr, $pat:expr) => (
- unwrap_or_return!($me.eat($input, $pat, u8::eq), ControlFlow::Break(ProcessResult::Suspend))
+ match $me.eat($input, $pat, u8::eq) {
+ Some(char) => ControlFlow::Continue(char),
+ None => ControlFlow::Break(ProcessResult::Suspend)
+ }
));
impl<Sink: TokenSink> Tokenizer<Sink> {
@@ -665,7 +680,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
match self.state {
//§ data-state
states::Data => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) {
+ match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n'))? {
FromSet('\0') => {
go!(self: error);
go!(self: emit '\0');
@@ -685,7 +700,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ rcdata-state
states::RawData(Rcdata) => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) {
+ match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n'))? {
FromSet('\0') => {
go!(self: error);
go!(self: emit '\u{fffd}');
@@ -705,7 +720,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ rawtext-state
states::RawData(Rawtext) => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) {
+ match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n'))? {
FromSet('\0') => {
go!(self: error);
go!(self: emit '\u{fffd}');
@@ -722,7 +737,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-state
states::RawData(ScriptData) => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) {
+ match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n'))? {
FromSet('\0') => {
go!(self: error);
go!(self: emit '\u{fffd}');
@@ -739,7 +754,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-escaped-state
states::RawData(ScriptDataEscaped(Escaped)) => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) {
+ match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n'))? {
FromSet('\0') => {
go!(self: error);
go!(self: emit '\u{fffd}');
@@ -760,7 +775,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-double-escaped-state
states::RawData(ScriptDataEscaped(DoubleEscaped)) => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) {
+ match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n'))? {
FromSet('\0') => {
go!(self: error);
go!(self: emit '\u{fffd}');
@@ -782,7 +797,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ plaintext-state
states::Plaintext => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n')) {
+ match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n'))? {
FromSet('\0') => {
go!(self: error);
go!(self: emit '\u{fffd}');
@@ -796,7 +811,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ tag-open-state
states::TagOpen => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'!' => {
go!(self: clear_temp);
return go!(self: to MarkupDeclarationOpen);
@@ -827,7 +842,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ end-tag-open-state
states::EndTagOpen => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'>' => {
go!(self: error);
return go!(self: to Data);
@@ -855,7 +870,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ tag-name-state
states::TagName => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => {
return go!(self: to BeforeAttributeName);
}
@@ -877,7 +892,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-escaped-less-than-sign-state
states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'/' => {
go!(self: clear_temp);
return go!(self: to RawEndTagOpen ScriptDataEscaped Escaped);
@@ -901,7 +916,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-double-escaped-less-than-sign-state
states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'/' => {
go!(self: clear_temp);
go!(self: emit '/');
@@ -917,7 +932,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ rcdata-less-than-sign-state rawtext-less-than-sign-state script-data-less-than-sign-state
// otherwise
states::RawLessThanSign(kind) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'/' => {
go!(self: clear_temp);
return go!(self: to RawEndTagOpen kind);
@@ -937,7 +952,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ rcdata-end-tag-open-state rawtext-end-tag-open-state script-data-end-tag-open-state script-data-escaped-end-tag-open-state
states::RawEndTagOpen(kind) => loop {
- let c = get_char!(self, input);
+ let c = get_char!(self, input)?;
match lower_ascii_letter(c) {
Some(cl) => {
go!(self: create_tag EndTag cl);
@@ -955,7 +970,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ rcdata-end-tag-name-state rawtext-end-tag-name-state script-data-end-tag-name-state script-data-escaped-end-tag-name-state
states::RawEndTagName(kind) => loop {
- let c = get_char!(self, input);
+ let c = get_char!(self, input)?;
if self.have_appropriate_end_tag() {
match c {
'\t' | '\n' | '\x0C' | ' ' => {
@@ -989,7 +1004,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-double-escape-start-state
states::ScriptDataEscapeStart(DoubleEscaped) => loop {
- let c = get_char!(self, input);
+ let c = get_char!(self, input)?;
match c {
'\t' | '\n' | '\x0C' | ' ' | '/' | '>' => {
let esc = if &*self.temp_buf == "script" {
@@ -1017,7 +1032,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-escape-start-state
states::ScriptDataEscapeStart(Escaped) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'-' => {
go!(self: emit '-');
return go!(self: to ScriptDataEscapeStartDash);
@@ -1031,7 +1046,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-escape-start-dash-state
states::ScriptDataEscapeStartDash => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'-' => {
go!(self: emit '-');
return go!(self: to ScriptDataEscapedDashDash Escaped);
@@ -1045,7 +1060,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-escaped-dash-state script-data-double-escaped-dash-state
states::ScriptDataEscapedDash(kind) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'-' => {
go!(self: emit '-');
return go!(self: to ScriptDataEscapedDashDash kind);
@@ -1074,7 +1089,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-escaped-dash-dash-state script-data-double-escaped-dash-dash-state
states::ScriptDataEscapedDashDash(kind) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'-' => {
go!(self: emit '-');
}
@@ -1106,7 +1121,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ script-data-double-escape-end-state
states::ScriptDataDoubleEscapeEnd => loop {
- let c = get_char!(self, input);
+ let c = get_char!(self, input)?;
match c {
'\t' | '\n' | '\x0C' | ' ' | '/' | '>' => {
let esc = if &*self.temp_buf == "script" {
@@ -1134,7 +1149,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ before-attribute-name-state
states::BeforeAttributeName => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => (),
'/' => {
return go!(self: to SelfClosingStartTag);
@@ -1166,7 +1181,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ attribute-name-state
states::AttributeName => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => {
return go!(self: to AfterAttributeName);
}
@@ -1200,7 +1215,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ after-attribute-name-state
states::AfterAttributeName => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => (),
'/' => {
return go!(self: to SelfClosingStartTag);
@@ -1237,7 +1252,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
// Use peek so we can handle the first attr character along with the rest,
// hopefully in the same zero-copy buffer.
states::BeforeAttributeValue => loop {
- match peek!(self, input) {
+ match peek!(self, input)? {
'\t' | '\n' | '\r' | '\x0C' | ' ' => {
go!(self: discard_char input);
}
@@ -1268,7 +1283,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ attribute-value-(double-quoted)-state
states::AttributeValue(DoubleQuoted) => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n')) {
+ match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n'))? {
FromSet('"') => {
return go!(self: to AfterAttributeValueQuoted);
}
@@ -1290,7 +1305,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ attribute-value-(single-quoted)-state
states::AttributeValue(SingleQuoted) => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n')) {
+ match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n'))? {
FromSet('\'') => {
return go!(self: to AfterAttributeValueQuoted);
}
@@ -1316,7 +1331,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
self,
input,
small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0')
- ) {
+ )? {
FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => {
return go!(self: to BeforeAttributeName);
}
@@ -1345,7 +1360,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ after-attribute-value-(quoted)-state
states::AfterAttributeValueQuoted => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => {
return go!(self: to BeforeAttributeName);
}
@@ -1365,7 +1380,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ self-closing-start-tag-state
states::SelfClosingStartTag => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'>' => {
self.current_tag_self_closing = true;
{
@@ -1382,7 +1397,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ comment-start-state
states::CommentStart => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'-' => {
return go!(self: to CommentStartDash);
}
@@ -1405,7 +1420,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ comment-start-dash-state
states::CommentStartDash => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'-' => {
return go!(self: to CommentEnd);
}
@@ -1429,7 +1444,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ comment-state
states::Comment => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'-' => {
return go!(self: to CommentEndDash);
}
@@ -1445,7 +1460,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ comment-end-dash-state
states::CommentEndDash => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'-' => {
return go!(self: to CommentEnd);
}
@@ -1464,7 +1479,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ comment-end-state
states::CommentEnd => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'>' => {
go!(self: emit_comment);
return go!(self: to Data);
@@ -1493,7 +1508,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ comment-end-bang-state
states::CommentEndBang => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'-' => {
go!(self: append_comment "--!");
return go!(self: to CommentEndDash);
@@ -1517,7 +1532,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ doctype-state
states::Doctype => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => {
return go!(self: to BeforeDoctypeName);
}
@@ -1531,7 +1546,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ before-doctype-name-state
states::BeforeDoctypeName => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => (),
'\0' => {
go!(self: error);
@@ -1556,7 +1571,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ doctype-name-state
states::DoctypeName => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => {
go!(self: clear_temp);
return go!(self: to AfterDoctypeName);
@@ -1577,16 +1592,16 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ after-doctype-name-state
states::AfterDoctypeName => loop {
- if eat!(self, input, "public") {
+ if eat!(self, input, "public")? {
{
return go!(self: to AfterDoctypeKeyword Public);
};
- } else if eat!(self, input, "system") {
+ } else if eat!(self, input, "system")? {
{
return go!(self: to AfterDoctypeKeyword System);
};
} else {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => (),
'>' => {
go!(self: emit_doctype);
@@ -1603,7 +1618,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ after-doctype-public-keyword-state after-doctype-system-keyword-state
states::AfterDoctypeKeyword(kind) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => {
return go!(self: to BeforeDoctypeIdentifier kind);
}
@@ -1633,7 +1648,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ before-doctype-public-identifier-state before-doctype-system-identifier-state
states::BeforeDoctypeIdentifier(kind) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => (),
'"' => {
go!(self: clear_doctype_id kind);
@@ -1659,7 +1674,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ doctype-public-identifier-(double-quoted)-state doctype-system-identifier-(double-quoted)-state
states::DoctypeIdentifierDoubleQuoted(kind) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'"' => {
return go!(self: to AfterDoctypeIdentifier kind);
}
@@ -1681,7 +1696,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ doctype-public-identifier-(single-quoted)-state doctype-system-identifier-(single-quoted)-state
states::DoctypeIdentifierSingleQuoted(kind) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\'' => {
return go!(self: to AfterDoctypeIdentifier kind);
}
@@ -1703,7 +1718,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ after-doctype-public-identifier-state
states::AfterDoctypeIdentifier(Public) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => {
return go!(self: to BetweenDoctypePublicAndSystemIdentifiers);
}
@@ -1731,7 +1746,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ after-doctype-system-identifier-state
states::AfterDoctypeIdentifier(System) => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => (),
'>' => {
go!(self: emit_doctype);
@@ -1746,7 +1761,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ between-doctype-public-and-system-identifiers-state
states::BetweenDoctypePublicAndSystemIdentifiers => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'\t' | '\n' | '\x0C' | ' ' => (),
'>' => {
go!(self: emit_doctype);
@@ -1770,7 +1785,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ bogus-doctype-state
states::BogusDoctype => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'>' => {
go!(self: emit_doctype);
return go!(self: to Data);
@@ -1781,7 +1796,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ bogus-comment-state
states::BogusComment => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
'>' => {
go!(self: emit_comment);
return go!(self: to Data);
@@ -1797,12 +1812,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ markup-declaration-open-state
states::MarkupDeclarationOpen => loop {
- if eat_exact!(self, input, "--") {
+ if eat_exact!(self, input, "--")? {
{
go!(self: clear_comment);
return go!(self: to CommentStart);
};
- } else if eat!(self, input, "doctype") {
+ } else if eat!(self, input, "doctype")? {
{
return go!(self: to Doctype);
};
@@ -1811,7 +1826,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
.sink
.adjusted_current_node_present_but_not_in_html_namespace()
{
- if eat_exact!(self, input, "[CDATA[") {
+ if eat_exact!(self, input, "[CDATA[")? {
{
go!(self: clear_temp);
return go!(self: to CdataSection);
@@ -1827,7 +1842,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ cdata-section-state
states::CdataSection => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
']' => {
return go!(self: to CdataSectionBracket);
}
@@ -1842,7 +1857,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
},
//§ cdata-section-bracket
- states::CdataSectionBracket => match get_char!(self, input) {
+ states::CdataSectionBracket => match get_char!(self, input)? {
']' => {
return go!(self: to CdataSectionEnd);
}
@@ -1855,7 +1870,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ cdata-section-end
states::CdataSectionEnd => loop {
- match get_char!(self, input) {
+ match get_char!(self, input)? {
']' => {
go!(self: push_temp ']');
}