aboutsummaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
authorMarkus Unterwaditzer <markus@unterwaditzer.net>2021-11-26 18:38:58 +0100
committerGitHub <noreply@github.com>2021-11-26 18:38:58 +0100
commitdced8066f77f570dd3e396ec3570c71aa86c454e (patch)
tree2fb81d5f411319948c135dfaaa78bc5da6783621 /src/lib.rs
parent029c13426634e58adb3996c9a5e4d79c3e8437f2 (diff)
Read html from io::BufRead (#8)
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs895
1 files changed, 464 insertions, 431 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 361ccd0..7722197 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,6 +7,7 @@ mod emitter;
mod entities;
mod error;
mod machine;
+mod never;
mod reader;
#[cfg(feature = "integration-tests")]
@@ -19,9 +20,9 @@ use machine::{
};
pub use emitter::{DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};
-pub use reader::{Readable, Reader, StringReader};
-
pub use error::Error;
+pub use never::Never;
+pub use reader::{BufReadReader, Readable, Reader, StringReader};
macro_rules! ctostr {
($c:expr) => {
@@ -152,15 +153,20 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
}
}
- fn read_char(&mut self) -> Option<char> {
- let (mut c, reconsumed) = match self.to_reconsume.pop() {
- Some(c) => (c?, true),
- None => (self.reader.read_char()?, false),
+ fn read_char(&mut self) -> Result<Option<char>, R::Error> {
+ let (c_res, reconsumed) = match self.to_reconsume.pop() {
+ Some(c) => (Ok(c), true),
+ None => (self.reader.read_char(), false),
+ };
+
+ let mut c = match c_res {
+ Ok(Some(c)) => c,
+ res => return res,
};
if c == '\r' {
c = '\n';
- let c2 = self.reader.read_char();
+ let c2 = self.reader.read_char()?;
if c2 != Some('\n') {
self.unread_char(c2);
}
@@ -170,11 +176,11 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.validate_char(c);
}
- Some(c)
+ Ok(Some(c))
}
#[inline]
- fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> bool {
+ fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, R::Error> {
debug_assert!(!s.is_empty());
debug_assert!(self.to_reconsume.is_empty());
self.reader.try_read_string(s, case_sensitive)
@@ -200,10 +206,10 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
}
}
- fn next_input_character(&mut self) -> Option<char> {
- let rv = self.read_char();
+ fn next_input_character(&mut self) -> Result<Option<char>, R::Error> {
+ let rv = self.read_char()?;
self.unread_char(rv);
- rv
+ Ok(rv)
}
fn flush_buffer_characters(&mut self) {
@@ -211,7 +217,7 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.temporary_buffer.clear();
}
- fn consume(&mut self) -> ControlToken {
+ fn consume(&mut self) -> Result<ControlToken, R::Error> {
macro_rules! mutate_character_reference {
(* $mul:literal + $x:ident - $sub:literal) => {
match self
@@ -229,106 +235,106 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
}
match self.state {
- State::Data => match self.read_char() {
+ State::Data => match self.read_char()? {
Some('&') => {
self.return_state = Some(self.state);
self.state = State::CharacterReference;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('<') => {
self.state = State::TagOpen;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.emit_string("\0");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) => {
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
- None => ControlToken::Eof,
+ None => Ok(ControlToken::Eof),
},
- State::RcData => match self.read_char() {
+ State::RcData => match self.read_char()? {
Some('&') => {
self.return_state = Some(State::RcData);
self.state = State::CharacterReference;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('<') => {
self.state = State::RcDataLessThanSign;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) => {
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
- None => ControlToken::Eof,
+ None => Ok(ControlToken::Eof),
},
- State::RawText => match self.read_char() {
+ State::RawText => match self.read_char()? {
Some('<') => {
self.state = State::RawTextLessThanSign;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) => {
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
- None => ControlToken::Eof,
+ None => Ok(ControlToken::Eof),
},
- State::ScriptData => match self.read_char() {
+ State::ScriptData => match self.read_char()? {
Some('<') => {
self.state = State::ScriptDataLessThanSign;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) => {
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
- None => ControlToken::Eof,
+ None => Ok(ControlToken::Eof),
},
- State::PlainText => match self.read_char() {
+ State::PlainText => match self.read_char()? {
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) => {
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
- None => ControlToken::Eof,
+ None => Ok(ControlToken::Eof),
},
- State::TagOpen => match self.read_char() {
+ State::TagOpen => match self.read_char()? {
Some('!') => {
self.state = State::MarkupDeclarationOpen;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('/') => {
self.state = State::EndTagOpen;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.init_start_tag();
self.state = State::TagName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c @ Some('?') => {
self.emitter
@@ -336,12 +342,12 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.init_comment();
self.state = State::BogusComment;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofBeforeTagName);
self.emitter.emit_string("<");
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter
@@ -349,25 +355,25 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.state = State::Data;
self.emitter.emit_string("<");
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::EndTagOpen => match self.read_char() {
+ State::EndTagOpen => match self.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.init_end_tag();
self.state = State::TagName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter.emit_error(Error::MissingEndTagName);
self.state = State::Data;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofBeforeTagName);
self.emitter.emit_string("</");
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter
@@ -375,82 +381,82 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.init_comment();
self.state = State::BogusComment;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::TagName => match self.read_char() {
+ State::TagName => match self.read_char()? {
Some(whitespace_pat!()) => {
self.state = State::BeforeAttributeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('/') => {
self.state = State::SelfClosingStartTag;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_tag_name("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) => {
self.emitter.push_tag_name(ctostr!(x.to_ascii_lowercase()));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInTag);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
},
- State::RcDataLessThanSign => match self.read_char() {
+ State::RcDataLessThanSign => match self.read_char()? {
Some('/') => {
self.temporary_buffer.clear();
self.state = State::RcDataEndTagOpen;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("<");
self.state = State::RcData;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::RcDataEndTagOpen => match self.read_char() {
+ State::RcDataEndTagOpen => match self.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.init_end_tag();
self.state = State::RcDataEndTagName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("</");
self.state = State::RcData;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::RcDataEndTagName => match self.read_char() {
+ State::RcDataEndTagName => match self.read_char()? {
Some(whitespace_pat!()) if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::BeforeAttributeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('/') if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::SelfClosingStartTag;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.push_tag_name(ctostr!(x.to_ascii_lowercase()));
self.temporary_buffer.push(x);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("</");
@@ -458,54 +464,54 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.state = State::RcData;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::RawTextLessThanSign => match self.read_char() {
+ State::RawTextLessThanSign => match self.read_char()? {
Some('/') => {
self.temporary_buffer.clear();
self.state = State::RawTextEndTagOpen;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("<");
self.state = State::RawText;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::RawTextEndTagOpen => match self.read_char() {
+ State::RawTextEndTagOpen => match self.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.init_end_tag();
self.state = State::RawTextEndTagName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("</");
self.state = State::RawText;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::RawTextEndTagName => match self.read_char() {
+ State::RawTextEndTagName => match self.read_char()? {
Some(whitespace_pat!()) if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::BeforeAttributeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('/') if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::SelfClosingStartTag;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.push_tag_name(ctostr!(x.to_ascii_lowercase()));
self.temporary_buffer.push(x);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("</");
@@ -513,237 +519,237 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.state = State::RawText;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataLessThanSign => match self.read_char() {
+ State::ScriptDataLessThanSign => match self.read_char()? {
Some('/') => {
self.temporary_buffer.clear();
self.state = State::ScriptDataEndTagOpen;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('!') => {
self.state = State::ScriptDataEscapeStart;
self.emitter.emit_string("<!");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("<");
self.state = State::Data;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEndTagOpen => match self.read_char() {
+ State::ScriptDataEndTagOpen => match self.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.init_end_tag();
self.state = State::ScriptDataEndTagName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("</");
self.state = State::ScriptData;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEndTagName => match self.read_char() {
+ State::ScriptDataEndTagName => match self.read_char()? {
Some(whitespace_pat!()) if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::BeforeAttributeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('/') if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::SelfClosingStartTag;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.push_tag_name(ctostr!(x.to_ascii_lowercase()));
self.temporary_buffer.push(x.to_ascii_lowercase());
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("</");
self.flush_buffer_characters();
self.state = State::Data;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEscapeStart => match self.read_char() {
+ State::ScriptDataEscapeStart => match self.read_char()? {
Some('-') => {
self.state = State::ScriptDataEscapeStartDash;
self.emitter.emit_string("-");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.state = State::ScriptData;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEscapeStartDash => match self.read_char() {
+ State::ScriptDataEscapeStartDash => match self.read_char()? {
Some('-') => {
self.state = State::ScriptDataEscapedDashDash;
self.emitter.emit_string("-");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.state = State::ScriptData;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEscaped => match self.read_char() {
+ State::ScriptDataEscaped => match self.read_char()? {
Some('-') => {
self.state = State::ScriptDataEscapedDash;
self.emitter.emit_string("-");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('<') => {
self.state = State::ScriptDataEscapedLessThanSign;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter
.emit_error(Error::EofInScriptHtmlCommentLikeText);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEscapedDash => match self.read_char() {
+ State::ScriptDataEscapedDash => match self.read_char()? {
Some('-') => {
self.state = State::ScriptDataEscapedDashDash;
self.emitter.emit_string("-");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('<') => {
self.state = State::ScriptDataEscapedLessThanSign;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.state = State::ScriptDataEscaped;
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter
.emit_error(Error::EofInScriptHtmlCommentLikeText);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.state = State::ScriptDataEscaped;
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEscapedDashDash => match self.read_char() {
+ State::ScriptDataEscapedDashDash => match self.read_char()? {
Some('-') => {
self.emitter.emit_string("-");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('<') => {
self.state = State::ScriptDataEscapedLessThanSign;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.state = State::ScriptData;
self.emitter.emit_string(">");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.state = State::ScriptDataEscaped;
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter
.emit_error(Error::EofInScriptHtmlCommentLikeText);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.state = State::ScriptDataEscaped;
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEscapedLessThanSign => match self.read_char() {
+ State::ScriptDataEscapedLessThanSign => match self.read_char()? {
Some('/') => {
self.temporary_buffer.clear();
self.state = State::ScriptDataEscapedEndTagOpen;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
self.temporary_buffer.clear();
self.emitter.emit_string("<");
self.state = State::ScriptDataDoubleEscapeStart;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("<");
self.state = State::ScriptDataEscaped;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEscapedEndTagOpen => match self.read_char() {
+ State::ScriptDataEscapedEndTagOpen => match self.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.init_end_tag();
self.state = State::ScriptDataEscapedEndTagName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("</");
self.unread_char(c);
self.state = State::ScriptDataEscaped;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataEscapedEndTagName => match self.read_char() {
+ State::ScriptDataEscapedEndTagName => match self.read_char()? {
Some(whitespace_pat!()) if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::BeforeAttributeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('/') if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::SelfClosingStartTag;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') if self.emitter.current_is_appropriate_end_tag_token() => {
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
self.emitter.push_tag_name(ctostr!(x.to_ascii_lowercase()));
self.temporary_buffer.push(x);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("</");
self.flush_buffer_characters();
self.state = State::ScriptDataEscaped;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataDoubleEscapeStart => match self.read_char() {
+ State::ScriptDataDoubleEscapeStart => match self.read_char()? {
Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
if self.temporary_buffer == "script" {
self.state = State::ScriptDataDoubleEscaped;
@@ -751,119 +757,119 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.state = State::ScriptDataEscaped;
}
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
self.temporary_buffer.push(x.to_ascii_lowercase());
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.state = State::ScriptDataEscaped;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataDoubleEscaped => match self.read_char() {
+ State::ScriptDataDoubleEscaped => match self.read_char()? {
Some('-') => {
self.state = State::ScriptDataDoubleEscapedDash;
self.emitter.emit_string("-");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('<') => {
self.state = State::ScriptDataDoubleEscapedLessThanSign;
self.emitter.emit_string("<");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter
.emit_error(Error::EofInScriptHtmlCommentLikeText);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataDoubleEscapedDash => match self.read_char() {
+ State::ScriptDataDoubleEscapedDash => match self.read_char()? {
Some('-') => {
self.state = State::ScriptDataDoubleEscapedDashDash;
self.emitter.emit_string("-");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('<') => {
self.state = State::ScriptDataDoubleEscapedLessThanSign;
self.emitter.emit_string("<");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.state = State::ScriptDataDoubleEscaped;
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter
.emit_error(Error::EofInScriptHtmlCommentLikeText);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.state = State::ScriptDataDoubleEscaped;
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataDoubleEscapedDashDash => match self.read_char() {
+ State::ScriptDataDoubleEscapedDashDash => match self.read_char()? {
Some('-') => {
self.emitter.emit_string("-");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('<') => {
self.emitter.emit_string("<");
self.state = State::ScriptDataDoubleEscapedLessThanSign;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter.emit_string(">");
self.state = State::ScriptData;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.state = State::ScriptDataDoubleEscaped;
self.emitter.emit_string("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter
.emit_error(Error::EofInScriptHtmlCommentLikeText);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.state = State::ScriptDataDoubleEscaped;
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataDoubleEscapedLessThanSign => match self.read_char() {
+ State::ScriptDataDoubleEscapedLessThanSign => match self.read_char()? {
Some('/') => {
self.temporary_buffer.clear();
self.state = State::ScriptDataDoubleEscapeEnd;
self.emitter.emit_string("/");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.state = State::ScriptDataDoubleEscaped;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::ScriptDataDoubleEscapeEnd => match self.read_char() {
+ State::ScriptDataDoubleEscapeEnd => match self.read_char()? {
Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
if self.temporary_buffer == "script" {
self.state = State::ScriptDataEscaped;
@@ -872,25 +878,25 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
}
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
self.temporary_buffer.push(x.to_ascii_lowercase());
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.state = State::ScriptDataDoubleEscaped;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::BeforeAttributeName => match self.read_char() {
- Some(whitespace_pat!()) => ControlToken::Continue,
+ State::BeforeAttributeName => match self.read_char()? {
+ Some(whitespace_pat!()) => Ok(ControlToken::Continue),
c @ Some('/' | '>') | c @ None => {
self.state = State::AfterAttributeName;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('=') => {
self.emitter
@@ -898,249 +904,249 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.init_attribute();
self.emitter.push_attribute_name("=");
self.state = State::AttributeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) => {
self.emitter.init_attribute();
self.state = State::AttributeName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AttributeName => match self.read_char() {
+ State::AttributeName => match self.read_char()? {
c @ Some(whitespace_pat!() | '/' | '>') | c @ None => {
self.state = State::AfterAttributeName;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('=') => {
self.state = State::BeforeAttributeValue;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_attribute_name("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x @ '"' | x @ '\'' | x @ '<') => {
self.emitter
.emit_error(Error::UnexpectedCharacterInAttributeName);
self.emitter
.push_attribute_name(ctostr!(x.to_ascii_lowercase()));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) => {
self.emitter
.push_attribute_name(ctostr!(x.to_ascii_lowercase()));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AfterAttributeName => match self.read_char() {
- Some(whitespace_pat!()) => ControlToken::Continue,
+ State::AfterAttributeName => match self.read_char()? {
+ Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('/') => {
self.state = State::SelfClosingStartTag;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('=') => {
self.state = State::BeforeAttributeValue;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInTag);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.init_attribute();
self.state = State::AttributeName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::BeforeAttributeValue => match self.read_char() {
- Some(whitespace_pat!()) => ControlToken::Continue,
+ State::BeforeAttributeValue => match self.read_char()? {
+ Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('"') => {
self.state = State::AttributeValueDoubleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\'') => {
self.state = State::AttributeValueSingleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter.emit_error(Error::MissingAttributeValue);
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.state = State::AttributeValueUnquoted;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AttributeValueDoubleQuoted => match self.read_char() {
+ State::AttributeValueDoubleQuoted => match self.read_char()? {
Some('"') => {
self.state = State::AfterAttributeValueQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('&') => {
self.return_state = Some(State::AttributeValueDoubleQuoted);
self.state = State::CharacterReference;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_attribute_value("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInTag);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.push_attribute_value(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AttributeValueSingleQuoted => match self.read_char() {
+ State::AttributeValueSingleQuoted => match self.read_char()? {
Some('\'') => {
self.state = State::AfterAttributeValueQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('&') => {
self.return_state = Some(State::AttributeValueSingleQuoted);
self.state = State::CharacterReference;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_attribute_value("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInTag);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.push_attribute_value(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AttributeValueUnquoted => match self.read_char() {
+ State::AttributeValueUnquoted => match self.read_char()? {
Some(whitespace_pat!()) => {
self.state = State::BeforeAttributeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('&') => {
self.return_state = Some(State::AttributeValueUnquoted);
self.state = State::CharacterReference;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_attribute_value("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x @ '"' | x @ '\'' | x @ '<' | x @ '=' | x @ '\u{60}') => {
self.emitter
.emit_error(Error::UnexpectedCharacterInUnquotedAttributeValue);
self.emitter.push_attribute_value(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInTag);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.push_attribute_value(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AfterAttributeValueQuoted => match self.read_char() {
+ State::AfterAttributeValueQuoted => match self.read_char()? {
Some(whitespace_pat!()) => {
self.state = State::BeforeAttributeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('/') => {
self.state = State::SelfClosingStartTag;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInTag);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter
.emit_error(Error::MissingWhitespaceBetweenAttributes);
self.state = State::BeforeAttributeName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::SelfClosingStartTag => match self.read_char() {
+ State::SelfClosingStartTag => match self.read_char()? {
Some('>') => {
self.emitter.set_self_closing();
self.state = State::Data;
self.emitter.emit_current_tag();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInTag);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.emit_error(Error::UnexpectedSolidusInTag);
self.state = State::BeforeAttributeName;
self.unread_char(Some(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::BogusComment => match self.read_char() {
+ State::BogusComment => match self.read_char()? {
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_comment();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_current_comment();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_comment("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x) => {
self.emitter.push_comment(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::MarkupDeclarationOpen => match self.read_char() {
- Some('-') if self.try_read_string("-", true) => {
+ State::MarkupDeclarationOpen => match self.read_char()? {
+ Some('-') if self.try_read_string("-", true)? => {
self.emitter.init_comment();
self.state = State::CommentStart;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
- Some('d' | 'D') if self.try_read_string("octype", false) => {
+ Some('d' | 'D') if self.try_read_string("octype", false)? => {
self.state = State::Doctype;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
- Some('[') if self.try_read_string("CDATA[", true) => {
+ Some('[') if self.try_read_string("CDATA[", true)? => {
// missing: check for adjusted current element: we don't have an element stack
// at all
//
@@ -1152,194 +1158,194 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.init_comment();
self.emitter.push_comment("[CDATA[");
self.state = State::BogusComment;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_error(Error::IncorrectlyOpenedComment);
self.emitter.init_comment();
self.state = State::BogusComment;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CommentStart => match self.read_char() {
+ State::CommentStart => match self.read_char()? {
Some('-') => {
self.state = State::CommentStartDash;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter.emit_error(Error::AbruptClosingOfEmptyComment);
self.state = State::Data;
self.emitter.emit_current_comment();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.unread_char(c);
self.state = State::Comment;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CommentStartDash => match self.read_char() {
+ State::CommentStartDash => match self.read_char()? {
Some('-') => {
self.state = State::CommentEnd;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter.emit_error(Error::AbruptClosingOfEmptyComment);
self.state = State::Data;
self.emitter.emit_current_comment();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInComment);
self.emitter.emit_current_comment();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter.push_comment("-");
self.unread_char(c);
self.state = State::Comment;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::Comment => match self.read_char() {
+ State::Comment => match self.read_char()? {
Some('<') => {
self.emitter.push_comment("<");
self.state = State::CommentLessThanSign;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('-') => {
self.state = State::CommentEndDash;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_comment("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInComment);
self.emitter.emit_current_comment();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.push_comment(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CommentLessThanSign => match self.read_char() {
+ State::CommentLessThanSign => match self.read_char()? {
Some('!') => {
self.emitter.push_comment("!");
self.state = State::CommentLessThanSignBang;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('<') => {
self.emitter.push_comment("<");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.unread_char(c);
self.state = State::Comment;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CommentLessThanSignBang => match self.read_char() {
+ State::CommentLessThanSignBang => match self.read_char()? {
Some('-') => {
self.state = State::CommentLessThanSignBangDash;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.unread_char(c);
self.state = State::Comment;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CommentLessThanSignBangDash => match self.read_char() {
+ State::CommentLessThanSignBangDash => match self.read_char()? {
Some('-') => {
self.state = State::CommentLessThanSignBangDashDash;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.unread_char(c);
self.state = State::CommentEndDash;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CommentLessThanSignBangDashDash => match self.read_char() {
+ State::CommentLessThanSignBangDashDash => match self.read_char()? {
c @ Some('>') | c @ None => {
self.unread_char(c);
self.state = State::CommentEnd;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_error(Error::NestedComment);
self.unread_char(c);
self.state = State::CommentEnd;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CommentEndDash => match self.read_char() {
+ State::CommentEndDash => match self.read_char()? {
Some('-') => {
self.state = State::CommentEnd;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInComment);
self.emitter.emit_current_comment();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c => {
self.emitter.push_comment("-");
self.unread_char(c);
self.state = State::Comment;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CommentEnd => match self.read_char() {
+ State::CommentEnd => match self.read_char()? {
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_comment();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('!') => {
self.state = State::CommentEndBang;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('-') => {
self.emitter.push_comment("-");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInComment);
self.emitter.emit_current_comment();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter.push_comment("-");
self.emitter.push_comment("-");
self.unread_char(c);
self.state = State::Comment;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CommentEndBang => match self.read_char() {
+ State::CommentEndBang => match self.read_char()? {
Some('-') => {
self.emitter.push_comment("-");
self.emitter.push_comment("-");
self.emitter.push_comment("!");
self.state = State::CommentEndDash;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter.emit_error(Error::IncorrectlyClosedComment);
self.state = State::Data;
self.emitter.emit_current_comment();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInComment);
self.emitter.emit_current_comment();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter.push_comment("-");
@@ -1347,42 +1353,42 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.push_comment("!");
self.state = State::Comment;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::Doctype => match self.read_char() {
+ State::Doctype => match self.read_char()? {
Some(whitespace_pat!()) => {
self.state = State::BeforeDoctypeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c @ Some('>') => {
self.unread_char(c);
self.state = State::BeforeDoctypeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.init_doctype();
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter
.emit_error(Error::MissingWhitespaceBeforeDoctypeName);
self.unread_char(c);
self.state = State::BeforeDoctypeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::BeforeDoctypeName => match self.read_char() {
- Some(whitespace_pat!()) => ControlToken::Continue,
+ State::BeforeDoctypeName => match self.read_char()? {
+ Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.init_doctype();
self.emitter.push_doctype_name("\u{fffd}");
self.state = State::DoctypeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter.emit_error(Error::MissingDoctypeName);
@@ -1390,70 +1396,70 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.init_doctype();
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.init_doctype();
self.emitter
.push_doctype_name(ctostr!(x.to_ascii_lowercase()));
self.state = State::DoctypeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::DoctypeName => match self.read_char() {
+ State::DoctypeName => match self.read_char()? {
Some(whitespace_pat!()) => {
self.state = State::AfterDoctypeName;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_doctype_name("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter
.push_doctype_name(ctostr!(x.to_ascii_lowercase()));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AfterDoctypeName => match self.read_char() {
- Some(whitespace_pat!()) => ControlToken::Continue,
+ State::AfterDoctypeName => match self.read_char()? {
+ Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
- Some('p' | 'P') if self.try_read_string("ublic", false) => {
+ Some('p' | 'P') if self.try_read_string("ublic", false)? => {
self.state = State::AfterDoctypePublicKeyword;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
- Some('s' | 'S') if self.try_read_string("ystem", false) => {
+ Some('s' | 'S') if self.try_read_string("ystem", false)? => {
self.state = State::AfterDoctypeSystemKeyword;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c @ Some(_) => {
self.emitter
@@ -1461,27 +1467,27 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.unread_char(c);
self.state = State::BogusDoctype;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AfterDoctypePublicKeyword => match self.read_char() {
+ State::AfterDoctypePublicKeyword => match self.read_char()? {
Some(whitespace_pat!()) => {
self.state = State::BeforeDoctypePublicIdentifier;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('"') => {
self.emitter
.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
self.emitter.set_doctype_public_identifier("");
self.state = State::DoctypePublicIdentifierDoubleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\'') => {
self.emitter
.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
self.emitter.set_doctype_public_identifier("");
self.state = State::DoctypePublicIdentifierSingleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter
@@ -1489,13 +1495,13 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter
@@ -1503,20 +1509,20 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.unread_char(c);
self.state = State::BogusDoctype;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::BeforeDoctypePublicIdentifier => match self.read_char() {
- Some(whitespace_pat!()) => ControlToken::Continue,
+ State::BeforeDoctypePublicIdentifier => match self.read_char()? {
+ Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('"') => {
self.emitter.set_doctype_public_identifier("");
self.state = State::DoctypePublicIdentifierDoubleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\'') => {
self.emitter.set_doctype_public_identifier("");
self.state = State::DoctypePublicIdentifierSingleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter
@@ -1524,13 +1530,13 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter
@@ -1538,18 +1544,18 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.unread_char(c);
self.state = State::BogusDoctype;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::DoctypePublicIdentifierDoubleQuoted => match self.read_char() {
+ State::DoctypePublicIdentifierDoubleQuoted => match self.read_char()? {
Some('"') => {
self.state = State::AfterDoctypePublicIdentifier;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_doctype_public_identifier("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter
@@ -1557,28 +1563,28 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.push_doctype_public_identifier(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::DoctypePublicIdentifierSingleQuoted => match self.read_char() {
+ State::DoctypePublicIdentifierSingleQuoted => match self.read_char()? {
Some('\'') => {
self.state = State::AfterDoctypePublicIdentifier;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_doctype_public_identifier("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter
@@ -1586,28 +1592,28 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.push_doctype_public_identifier(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AfterDoctypePublicIdentifier => match self.read_char() {
+ State::AfterDoctypePublicIdentifier => match self.read_char()? {
Some(whitespace_pat!()) => {
self.state = State::BetweenDoctypePublicAndSystemIdentifiers;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('"') => {
self.emitter.emit_error(
@@ -1615,7 +1621,7 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
);
self.emitter.set_doctype_system_identifier("");
self.state = State::DoctypeSystemIdentifierDoubleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\'') => {
self.emitter.emit_error(
@@ -1623,13 +1629,13 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
);
self.emitter.set_doctype_system_identifier("");
self.state = State::DoctypeSystemIdentifierSingleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter
@@ -1637,31 +1643,31 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.unread_char(c);
self.state = State::BogusDoctype;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::BetweenDoctypePublicAndSystemIdentifiers => match self.read_char() {
- Some(whitespace_pat!()) => ControlToken::Continue,
+ State::BetweenDoctypePublicAndSystemIdentifiers => match self.read_char()? {
+ Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('"') => {
self.emitter.set_doctype_system_identifier("");
self.state = State::DoctypeSystemIdentifierDoubleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\'') => {
self.emitter.set_doctype_system_identifier("");
self.state = State::DoctypeSystemIdentifierSingleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter
@@ -1669,27 +1675,27 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::BogusDoctype;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AfterDoctypeSystemKeyword => match self.read_char() {
+ State::AfterDoctypeSystemKeyword => match self.read_char()? {
Some(whitespace_pat!()) => {
self.state = State::BeforeDoctypeSystemIdentifier;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('"') => {
self.emitter
.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
self.emitter.set_doctype_system_identifier("");
self.state = State::DoctypeSystemIdentifierDoubleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\'') => {
self.emitter
.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
self.emitter.set_doctype_system_identifier("");
self.state = State::DoctypeSystemIdentifierSingleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter
@@ -1697,13 +1703,13 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter
@@ -1711,20 +1717,20 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::BogusDoctype;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::BeforeDoctypeSystemIdentifier => match self.read_char() {
- Some(whitespace_pat!()) => ControlToken::Continue,
+ State::BeforeDoctypeSystemIdentifier => match self.read_char()? {
+ Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('"') => {
self.emitter.set_doctype_system_identifier("");
self.state = State::DoctypeSystemIdentifierDoubleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\'') => {
self.emitter.set_doctype_system_identifier("");
self.state = State::DoctypeSystemIdentifierSingleQuoted;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter
@@ -1732,13 +1738,13 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter
@@ -1746,18 +1752,18 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::BogusDoctype;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::DoctypeSystemIdentifierDoubleQuoted => match self.read_char() {
+ State::DoctypeSystemIdentifierDoubleQuoted => match self.read_char()? {
Some('"') => {
self.state = State::AfterDoctypeSystemIdentifier;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_doctype_system_identifier("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter
@@ -1765,28 +1771,28 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.push_doctype_system_identifier(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::DoctypeSystemIdentifierSingleQuoted => match self.read_char() {
+ State::DoctypeSystemIdentifierSingleQuoted => match self.read_char()? {
Some('\'') => {
self.state = State::AfterDoctypeSystemIdentifier;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
self.emitter.push_doctype_system_identifier("\u{fffd}");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.emitter
@@ -1794,144 +1800,144 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.set_force_quirks();
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.push_doctype_system_identifier(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::AfterDoctypeSystemIdentifier => match self.read_char() {
- Some(whitespace_pat!()) => ControlToken::Continue,
+ State::AfterDoctypeSystemIdentifier => match self.read_char()? {
+ Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInDoctype);
self.emitter.set_force_quirks();
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
c @ Some(_) => {
self.emitter
.emit_error(Error::UnexpectedCharacterAfterDoctypeSystemIdentifier);
self.unread_char(c);
self.state = State::BogusDoctype;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::BogusDoctype => match self.read_char() {
+ State::BogusDoctype => match self.read_char()? {
Some('>') => {
self.state = State::Data;
self.emitter.emit_current_doctype();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('\0') => {
self.emitter.emit_error(Error::UnexpectedNullCharacter);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_current_doctype();
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
- Some(_) => ControlToken::Continue,
+ Some(_) => Ok(ControlToken::Continue),
},
- State::CdataSection => match self.read_char() {
+ State::CdataSection => match self.read_char()? {
Some(']') => {
self.state = State::CdataSectionBracket;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
None => {
self.emitter.emit_error(Error::EofInCdata);
- ControlToken::Eof
+ Ok(ControlToken::Eof)
}
Some(x) => {
self.emitter.emit_string(ctostr!(x));
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CdataSectionBracket => match self.read_char() {
+ State::CdataSectionBracket => match self.read_char()? {
Some(']') => {
self.state = State::CdataSectionEnd;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("]");
self.state = State::CdataSection;
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::CdataSectionEnd => match self.read_char() {
+ State::CdataSectionEnd => match self.read_char()? {
Some(']') => {
self.emitter.emit_string("]");
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('>') => {
self.state = State::Data;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter.emit_string("]]");
self.unread_char(c);
self.state = State::CdataSection;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
State::CharacterReference => {
self.temporary_buffer.clear();
self.temporary_buffer.push('&');
- match self.read_char() {
+ match self.read_char()? {
Some(x) if x.is_ascii_alphanumeric() => {
self.unread_char(Some(x));
self.state = State::NamedCharacterReference;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some('#') => {
self.temporary_buffer.push('#');
self.state = State::NumericCharacterReference;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.flush_code_points_consumed_as_character_reference();
self.state = self.return_state.take().unwrap();
self.unread_char(c);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
}
}
State::NamedCharacterReference => {
- let c = self.read_char();
+ let c = self.read_char()?;
+
+ let char_ref = match c {
+ Some(x) => entities::try_read_character_reference(x, |x| {
+ self.try_read_string(x, true)
+ })?
+ .map(|char_ref| (x, char_ref)),
- let char_ref = c.and_then(|x| {
- Some((
- x,
- entities::try_read_character_reference(x, |x| {
- self.try_read_string(x, true)
- })?,
- ))
- });
+ None => None,
+ };
if let Some((x, char_ref)) = char_ref {
self.temporary_buffer.push(x);
self.temporary_buffer.push_str(char_ref.name);
let char_ref_name_last_character = char_ref.name.chars().last();
- let next_character = self.next_input_character();
+ let next_character = self.next_input_character()?;
if self.is_consumed_as_part_of_an_attribute()
&& char_ref_name_last_character != Some(';')
&& matches!(next_character, Some(x) if x == '=' || x.is_ascii_alphanumeric())
{
self.flush_code_points_consumed_as_character_reference();
self.state = self.return_state.take().unwrap();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
} else {
if char_ref_name_last_character != Some(';') {
self.emitter
@@ -1942,16 +1948,16 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.temporary_buffer.push_str(char_ref.characters);
self.flush_code_points_consumed_as_character_reference();
self.state = self.return_state.take().unwrap();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
} else {
self.unread_char(c);
self.flush_code_points_consumed_as_character_reference();
self.state = State::AmbiguousAmpersand;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
}
- State::AmbiguousAmpersand => match self.read_char() {
+ State::AmbiguousAmpersand => match self.read_char()? {
Some(x) if x.is_ascii_alphanumeric() => {
if self.is_consumed_as_part_of_an_attribute() {
self.emitter.push_attribute_value(ctostr!(x));
@@ -1959,41 +1965,41 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.emitter.emit_string(ctostr!(x));
}
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c @ Some(';') => {
self.emitter
.emit_error(Error::UnknownNamedCharacterReference);
self.unread_char(c);
self.state = self.return_state.take().unwrap();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.unread_char(c);
self.state = self.return_state.take().unwrap();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
State::NumericCharacterReference => {
self.character_reference_code = 0;
- match self.read_char() {
+ match self.read_char()? {
Some(x @ 'x' | x @ 'X') => {
self.temporary_buffer.push(x);
self.state = State::HexadecimalCharacterReferenceStart;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.unread_char(c);
self.state = State::DecimalCharacterReferenceStart;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
}
}
- State::HexadecimalCharacterReferenceStart => match self.read_char() {
+ State::HexadecimalCharacterReferenceStart => match self.read_char()? {
c @ Some('0'..='9' | 'A'..='F' | 'a'..='f') => {
self.unread_char(c);
self.state = State::HexadecimalCharacterReference;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter
@@ -2001,14 +2007,14 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.flush_code_points_consumed_as_character_reference();
self.unread_char(c);
self.state = self.return_state.take().unwrap();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::DecimalCharacterReferenceStart => match self.read_char() {
+ State::DecimalCharacterReferenceStart => match self.read_char()? {
Some(x @ ascii_digit_pat!()) => {
self.unread_char(Some(x));
self.state = State::DecimalCharacterReference;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter
@@ -2016,49 +2022,49 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
self.flush_code_points_consumed_as_character_reference();
self.unread_char(c);
self.state = self.return_state.take().unwrap();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::HexadecimalCharacterReference => match self.read_char() {
+ State::HexadecimalCharacterReference => match self.read_char()? {
Some(x @ ascii_digit_pat!()) => {
mutate_character_reference!(*16 + x - 0x0030);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x @ 'A'..='F') => {
mutate_character_reference!(*16 + x - 0x0037);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(x @ 'a'..='f') => {
mutate_character_reference!(*16 + x - 0x0057);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(';') => {
self.state = State::NumericCharacterReferenceEnd;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter
.emit_error(Error::MissingSemicolonAfterCharacterReference);
self.unread_char(c);
self.state = State::NumericCharacterReferenceEnd;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
- State::DecimalCharacterReference => match self.read_char() {
+ State::DecimalCharacterReference => match self.read_char()? {
Some(x @ ascii_digit_pat!()) => {
mutate_character_reference!(*10 + x - 0x0030);
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
Some(';') => {
self.state = State::NumericCharacterReferenceEnd;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
c => {
self.emitter
.emit_error(Error::MissingSemicolonAfterCharacterReference);
self.unread_char(c);
self.state = State::NumericCharacterReferenceEnd;
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
},
State::NumericCharacterReferenceEnd => {
@@ -2125,26 +2131,27 @@ impl<R: Reader, E: Emitter> Tokenizer<R, E> {
.push(std::char::from_u32(self.character_reference_code).unwrap());
self.flush_code_points_consumed_as_character_reference();
self.state = self.return_state.take().unwrap();
- ControlToken::Continue
+ Ok(ControlToken::Continue)
}
}
}
}
impl<R: Reader, E: Emitter> Iterator for Tokenizer<R, E> {
- type Item = E::Token;
+ type Item = Result<E::Token, R::Error>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(token) = self.emitter.pop_token() {
- break Some(token);
+ break Some(Ok(token));
} else if !self.eof {
match self.consume() {
- ControlToken::Continue => (),
- ControlToken::Eof => {
+ Ok(ControlToken::Continue) => (),
+ Ok(ControlToken::Eof) => {
self.eof = true;
self.emitter.emit_eof();
}
+ Err(e) => break Some(Err(e)),
}
} else {
break None;
@@ -2152,3 +2159,29 @@ impl<R: Reader, E: Emitter> Iterator for Tokenizer<R, E> {
}
}
}
+
+/// A kind of tokenizer that directly yields tokens when used as an iterator, so `Token` instead of
+/// `Result<Token, _>`.
+///
+/// This is the return value of [`Tokenizer::infallible`].
+pub struct InfallibleTokenizer<R: Reader<Error = Never>, E: Emitter>(Tokenizer<R, E>);
+
+impl<R: Reader<Error = Never>, E: Emitter> Tokenizer<R, E> {
+ /// Statically assert that this iterator is infallible.
+ ///
+ /// Call this to get rid of error handling when parsing HTML from strings.
+ pub fn infallible(self) -> InfallibleTokenizer<R, E> {
+ InfallibleTokenizer(self)
+ }
+}
+
+impl<R: Reader<Error = Never>, E: Emitter> Iterator for InfallibleTokenizer<R, E> {
+ type Item = E::Token;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ match self.0.next()? {
+ Ok(token) => Some(token),
+ Err(e) => match e {},
+ }
+ }
+}