aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/emitter.rs26
-rw-r--r--src/machine.rs266
-rw-r--r--src/spans.rs34
-rw-r--r--src/tokenizer.rs13
4 files changed, 162 insertions, 177 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index 48ac391..b47dc20 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -45,7 +45,7 @@ pub trait Emitter<R> {
fn emit_eof(&mut self);
/// A (probably recoverable) parsing error has occured.
- fn emit_error(&mut self, error: Error);
+ fn emit_error(&mut self, error: Error, reader: &R);
/// After every state change, the tokenizer calls this method to retrieve a new token that can
/// be returned via the tokenizer's iterator interface.
@@ -89,7 +89,7 @@ pub trait Emitter<R> {
///
/// If the current token is an end tag, the emitter should emit the
/// [`crate::Error::EndTagWithTrailingSolidus`] error.
- fn set_self_closing(&mut self);
+ fn set_self_closing(&mut self, reader: &R);
/// Assuming the _current token_ is a doctype, set its "force quirks" flag to true.
///
@@ -250,6 +250,13 @@ impl<R> DefaultEmitter<R, ()> {
let s = mem::take(&mut self.current_characters);
self.emit_token(Token::String(s));
}
+
+ fn emit_error(&mut self, error: Error) {
+ // bypass character flushing in self.emit_token: we don't need the error location to be
+ // that exact
+ self.emitted_tokens
+ .push_front(Token::Error { error, span: () });
+ }
}
impl<R> Emitter<R> for DefaultEmitter<R, ()> {
@@ -265,10 +272,8 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {
self.flush_current_characters();
}
- fn emit_error(&mut self, error: Error) {
- // bypass character flushing in self.emit_token: we don't need the error location to be
- // that exact
- self.emitted_tokens.push_front(Token::Error(error));
+ fn emit_error(&mut self, error: Error, _reader: &R) {
+ self.emit_error(error);
}
fn pop_token(&mut self) -> Option<Self::Token> {
@@ -319,7 +324,7 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {
self.emit_token(doctype);
}
- fn set_self_closing(&mut self) {
+ fn set_self_closing(&mut self, _reader: &R) {
let tag = self.current_token.as_mut().unwrap();
match tag {
Token::StartTag(StartTag {
@@ -540,5 +545,10 @@ pub enum Token<S> {
///
/// Can be skipped over, the tokenizer is supposed to recover from the error and continues with
/// more tokens afterward.
- Error(Error),
+ Error {
+ /// What kind of error occured.
+ error: Error,
+ /// The source code span of the error.
+ span: S,
+ },
}
diff --git a/src/machine.rs b/src/machine.rs
index 4300f45..8c062ec 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -39,7 +39,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.emit_string("\0");
Ok(ControlToken::Continue)
}
@@ -60,7 +60,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
@@ -76,7 +76,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
@@ -92,7 +92,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
@@ -104,7 +104,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
},
State::PlainText => match slf.read_char()? {
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
@@ -130,21 +130,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
c @ Some('?') => {
- slf.emitter
- .emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName);
+ slf.emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName);
slf.emitter.init_comment(&slf.reader);
slf.state = State::BogusComment;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofBeforeTagName);
+ slf.emit_error(Error::EofBeforeTagName);
slf.emitter.emit_string("<");
Ok(ControlToken::Eof)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::InvalidFirstCharacterOfTagName);
+ slf.emit_error(Error::InvalidFirstCharacterOfTagName);
slf.state = State::Data;
slf.emitter.emit_string("<");
slf.unread_char(c);
@@ -159,18 +157,17 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::MissingEndTagName);
+ slf.emit_error(Error::MissingEndTagName);
slf.state = State::Data;
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofBeforeTagName);
+ slf.emit_error(Error::EofBeforeTagName);
slf.emitter.emit_string("</");
Ok(ControlToken::Eof)
}
Some(x) => {
- slf.emitter
- .emit_error(Error::InvalidFirstCharacterOfTagName);
+ slf.emit_error(Error::InvalidFirstCharacterOfTagName);
slf.emitter.init_comment(&slf.reader);
slf.state = State::BogusComment;
slf.unread_char(Some(x));
@@ -192,7 +189,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_tag_name("\u{fffd}");
Ok(ControlToken::Continue)
}
@@ -201,7 +198,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInTag);
+ slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
},
@@ -409,13 +406,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter
- .emit_error(Error::EofInScriptHtmlCommentLikeText);
+ slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -434,14 +430,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.state = State::ScriptDataEscaped;
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter
- .emit_error(Error::EofInScriptHtmlCommentLikeText);
+ slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -465,14 +460,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.state = State::ScriptDataEscaped;
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter
- .emit_error(Error::EofInScriptHtmlCommentLikeText);
+ slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -575,13 +569,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter
- .emit_error(Error::EofInScriptHtmlCommentLikeText);
+ slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -601,14 +594,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.state = State::ScriptDataDoubleEscaped;
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter
- .emit_error(Error::EofInScriptHtmlCommentLikeText);
+ slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -633,14 +625,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.state = State::ScriptDataDoubleEscaped;
slf.emitter.emit_string("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter
- .emit_error(Error::EofInScriptHtmlCommentLikeText);
+ slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -692,8 +683,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('=') => {
- slf.emitter
- .emit_error(Error::UnexpectedEqualsSignBeforeAttributeName);
+ slf.emit_error(Error::UnexpectedEqualsSignBeforeAttributeName);
slf.emitter.init_attribute_name(&slf.reader);
slf.emitter.push_attribute_name("=");
slf.state = State::AttributeName;
@@ -717,13 +707,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_attribute_name("\u{fffd}");
Ok(ControlToken::Continue)
}
Some(x @ '"' | x @ '\'' | x @ '<') => {
- slf.emitter
- .emit_error(Error::UnexpectedCharacterInAttributeName);
+ slf.emit_error(Error::UnexpectedCharacterInAttributeName);
slf.emitter
.push_attribute_name(ctostr!(x.to_ascii_lowercase()));
Ok(ControlToken::Continue)
@@ -750,7 +739,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInTag);
+ slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -773,7 +762,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::MissingAttributeValue);
+ slf.emit_error(Error::MissingAttributeValue);
slf.state = State::Data;
slf.emitter.emit_current_tag();
Ok(ControlToken::Continue)
@@ -796,12 +785,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_attribute_value("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInTag);
+ slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -820,12 +809,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_attribute_value("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInTag);
+ slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -849,18 +838,17 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_attribute_value("\u{fffd}");
Ok(ControlToken::Continue)
}
Some(x @ '"' | x @ '\'' | x @ '<' | x @ '=' | x @ '\u{60}') => {
- slf.emitter
- .emit_error(Error::UnexpectedCharacterInUnquotedAttributeValue);
+ slf.emit_error(Error::UnexpectedCharacterInUnquotedAttributeValue);
slf.emitter.push_attribute_value(ctostr!(x));
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInTag);
+ slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -883,12 +871,11 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInTag);
+ slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
- slf.emitter
- .emit_error(Error::MissingWhitespaceBetweenAttributes);
+ slf.emit_error(Error::MissingWhitespaceBetweenAttributes);
slf.state = State::BeforeAttributeName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
@@ -896,17 +883,17 @@ pub fn consume<R: Reader, E: Emitter<R>>(
},
State::SelfClosingStartTag => match slf.read_char()? {
Some('>') => {
- slf.emitter.set_self_closing();
+ slf.emitter.set_self_closing(&slf.reader);
slf.state = State::Data;
slf.emitter.emit_current_tag();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInTag);
+ slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
- slf.emitter.emit_error(Error::UnexpectedSolidusInTag);
+ slf.emit_error(Error::UnexpectedSolidusInTag);
slf.state = State::BeforeAttributeName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
@@ -923,7 +910,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Eof)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_comment("\u{fffd}");
Ok(ControlToken::Continue)
}
@@ -949,7 +936,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
// missing: cdata transition
//
// let's hope that bogus comment can just sort of skip over cdata
- slf.emitter.emit_error(Error::CdataInHtmlContent);
+ slf.emit_error(Error::CdataInHtmlContent);
slf.emitter.init_comment(&slf.reader);
slf.emitter.push_comment("[CDATA[");
@@ -957,7 +944,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
c => {
- slf.emitter.emit_error(Error::IncorrectlyOpenedComment);
+ slf.emit_error(Error::IncorrectlyOpenedComment);
slf.emitter.init_comment(&slf.reader);
slf.state = State::BogusComment;
slf.unread_char(c);
@@ -970,7 +957,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::AbruptClosingOfEmptyComment);
+ slf.emit_error(Error::AbruptClosingOfEmptyComment);
slf.state = State::Data;
slf.emitter.emit_current_comment();
Ok(ControlToken::Continue)
@@ -987,13 +974,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::AbruptClosingOfEmptyComment);
+ slf.emit_error(Error::AbruptClosingOfEmptyComment);
slf.state = State::Data;
slf.emitter.emit_current_comment();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInComment);
+ slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment();
Ok(ControlToken::Eof)
}
@@ -1015,12 +1002,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_comment("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInComment);
+ slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment();
Ok(ControlToken::Eof)
}
@@ -1074,7 +1061,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
c => {
- slf.emitter.emit_error(Error::NestedComment);
+ slf.emit_error(Error::NestedComment);
slf.unread_char(c);
slf.state = State::CommentEnd;
Ok(ControlToken::Continue)
@@ -1086,7 +1073,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInComment);
+ slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment();
Ok(ControlToken::Eof)
}
@@ -1112,7 +1099,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInComment);
+ slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment();
Ok(ControlToken::Eof)
}
@@ -1133,13 +1120,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::IncorrectlyClosedComment);
+ slf.emit_error(Error::IncorrectlyClosedComment);
slf.state = State::Data;
slf.emitter.emit_current_comment();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInComment);
+ slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment();
Ok(ControlToken::Eof)
}
@@ -1163,15 +1150,14 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.init_doctype(&slf.reader);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::MissingWhitespaceBeforeDoctypeName);
+ slf.emit_error(Error::MissingWhitespaceBeforeDoctypeName);
slf.unread_char(c);
slf.state = State::BeforeDoctypeName;
Ok(ControlToken::Continue)
@@ -1180,14 +1166,14 @@ pub fn consume<R: Reader, E: Emitter<R>>(
State::BeforeDoctypeName => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.init_doctype(&slf.reader);
slf.emitter.push_doctype_name("\u{fffd}");
slf.state = State::DoctypeName;
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::MissingDoctypeName);
+ slf.emit_error(Error::MissingDoctypeName);
slf.emitter.init_doctype(&slf.reader);
slf.emitter.set_force_quirks();
slf.state = State::Data;
@@ -1195,7 +1181,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.init_doctype(&slf.reader);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
@@ -1220,12 +1206,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_name("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
@@ -1244,7 +1230,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
@@ -1258,8 +1244,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::InvalidCharacterSequenceAfterDoctypeName);
+ slf.emit_error(Error::InvalidCharacterSequenceAfterDoctypeName);
slf.emitter.set_force_quirks();
slf.unread_char(c);
slf.state = State::BogusDoctype;
@@ -1272,36 +1257,32 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('"') => {
- slf.emitter
- .emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
+ slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
slf.emitter.set_doctype_public_identifier("");
slf.state = State::DoctypePublicIdentifierDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
- slf.emitter
- .emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
+ slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
slf.emitter.set_doctype_public_identifier("");
slf.state = State::DoctypePublicIdentifierSingleQuoted;
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter
- .emit_error(Error::MissingDoctypePublicIdentifier);
+ slf.emit_error(Error::MissingDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
+ slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.unread_char(c);
slf.state = State::BogusDoctype;
@@ -1321,22 +1302,20 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter
- .emit_error(Error::MissingDoctypePublicIdentifier);
+ slf.emit_error(Error::MissingDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
+ slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.unread_char(c);
slf.state = State::BogusDoctype;
@@ -1349,19 +1328,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_public_identifier("\u{fffd}");
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::AbruptDoctypePublicIdentifier);
+ slf.emit_error(Error::AbruptDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
@@ -1377,19 +1356,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_public_identifier("\u{fffd}");
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::AbruptDoctypePublicIdentifier);
+ slf.emit_error(Error::AbruptDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
@@ -1410,28 +1389,25 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('"') => {
- slf.emitter
- .emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
+ slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
slf.emitter.set_doctype_system_identifier("");
slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
- slf.emitter
- .emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
+ slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
slf.emitter.set_doctype_system_identifier("");
slf.state = State::DoctypeSystemIdentifierSingleQuoted;
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
+ slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.unread_char(c);
slf.state = State::BogusDoctype;
@@ -1456,14 +1432,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
+ slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::BogusDoctype;
slf.unread_char(c);
@@ -1476,36 +1451,32 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('"') => {
- slf.emitter
- .emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
+ slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
slf.emitter.set_doctype_system_identifier("");
slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
- slf.emitter
- .emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
+ slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
slf.emitter.set_doctype_system_identifier("");
slf.state = State::DoctypeSystemIdentifierSingleQuoted;
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter
- .emit_error(Error::MissingDoctypeSystemIdentifier);
+ slf.emit_error(Error::MissingDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
+ slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::BogusDoctype;
slf.unread_char(c);
@@ -1525,22 +1496,20 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter
- .emit_error(Error::MissingDoctypeSystemIdentifier);
+ slf.emit_error(Error::MissingDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
+ slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::BogusDoctype;
slf.unread_char(c);
@@ -1553,19 +1522,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_system_identifier("\u{fffd}");
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::AbruptDoctypeSystemIdentifier);
+ slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
@@ -1581,19 +1550,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_system_identifier("\u{fffd}");
Ok(ControlToken::Continue)
}
Some('>') => {
- slf.emitter.emit_error(Error::AbruptDoctypeSystemIdentifier);
+ slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype();
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
@@ -1611,14 +1580,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInDoctype);
+ slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
}
c @ Some(_) => {
- slf.emitter
- .emit_error(Error::UnexpectedCharacterAfterDoctypeSystemIdentifier);
+ slf.emit_error(Error::UnexpectedCharacterAfterDoctypeSystemIdentifier);
slf.unread_char(c);
slf.state = State::BogusDoctype;
Ok(ControlToken::Continue)
@@ -1631,7 +1599,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
Some('\0') => {
- slf.emitter.emit_error(Error::UnexpectedNullCharacter);
+ slf.emit_error(Error::UnexpectedNullCharacter);
Ok(ControlToken::Continue)
}
None => {
@@ -1646,7 +1614,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
None => {
- slf.emitter.emit_error(Error::EofInCdata);
+ slf.emit_error(Error::EofInCdata);
Ok(ControlToken::Eof)
}
Some(x) => {
@@ -1728,8 +1696,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
} else {
if char_ref_name_last_character != Some(';') {
- slf.emitter
- .emit_error(Error::MissingSemicolonAfterCharacterReference);
+ slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
}
slf.temporary_buffer.clear();
@@ -1756,8 +1723,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
c @ Some(';') => {
- slf.emitter
- .emit_error(Error::UnknownNamedCharacterReference);
+ slf.emit_error(Error::UnknownNamedCharacterReference);
slf.unread_char(c);
slf.state = slf.return_state.take().unwrap();
Ok(ControlToken::Continue)
@@ -1790,8 +1756,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
c => {
- slf.emitter
- .emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
+ slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
slf.flush_code_points_consumed_as_character_reference();
slf.unread_char(c);
slf.state = slf.return_state.take().unwrap();
@@ -1805,8 +1770,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
c => {
- slf.emitter
- .emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
+ slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
slf.flush_code_points_consumed_as_character_reference();
slf.unread_char(c);
slf.state = slf.return_state.take().unwrap();
@@ -1831,8 +1795,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
c => {
- slf.emitter
- .emit_error(Error::MissingSemicolonAfterCharacterReference);
+ slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
slf.unread_char(c);
slf.state = State::NumericCharacterReferenceEnd;
Ok(ControlToken::Continue)
@@ -1848,8 +1811,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(
Ok(ControlToken::Continue)
}
c => {
- slf.emitter
- .emit_error(Error::MissingSemicolonAfterCharacterReference);
+ slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
slf.unread_char(c);
slf.state = State::NumericCharacterReferenceEnd;
Ok(ControlToken::Continue)
@@ -1858,28 +1820,26 @@ pub fn consume<R: Reader, E: Emitter<R>>(
State::NumericCharacterReferenceEnd => {
match slf.character_reference_code {
0x00 => {
- slf.emitter.emit_error(Error::NullCharacterReference);
+ slf.emit_error(Error::NullCharacterReference);
slf.character_reference_code = 0xfffd;
}
0x110000.. => {
- slf.emitter
- .emit_error(Error::CharacterReferenceOutsideUnicodeRange);
+ slf.emit_error(Error::CharacterReferenceOutsideUnicodeRange);
slf.character_reference_code = 0xfffd;
}
surrogate_pat!() => {
- slf.emitter.emit_error(Error::SurrogateCharacterReference);
+ slf.emit_error(Error::SurrogateCharacterReference);
slf.character_reference_code = 0xfffd;
}
// noncharacter
noncharacter_pat!() => {
- slf.emitter
- .emit_error(Error::NoncharacterCharacterReference);
+ slf.emit_error(Error::NoncharacterCharacterReference);
}
// 0x000d, or a control that is not whitespace
x @ 0x000d | x @ control_pat!()
if !matches!(x, 0x0009 | 0x000a | 0x000c | 0x0020) =>
{
- slf.emitter.emit_error(Error::ControlCharacterReference);
+ slf.emit_error(Error::ControlCharacterReference);
slf.character_reference_code = match x {
0x80 => 0x20AC, // EURO SIGN (€)
0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK (‚)
diff --git a/src/spans.rs b/src/spans.rs
index 6d7c18e..85a64a9 100644
--- a/src/spans.rs
+++ b/src/spans.rs
@@ -62,6 +62,7 @@ pub struct SpanEmitter<R> {
seen_attributes: BTreeSet<String>,
emitted_tokens: VecDeque<Token<Span>>,
reader: PhantomData<R>,
+ attr_in_end_tag_span: Span,
}
impl<R> Default for SpanEmitter<R> {
@@ -74,6 +75,7 @@ impl<R> Default for SpanEmitter<R> {
seen_attributes: BTreeSet::new(),
emitted_tokens: VecDeque::new(),
reader: PhantomData::default(),
+ attr_in_end_tag_span: Span::default(),
}
}
}
@@ -91,18 +93,19 @@ impl<R: GetPos> SpanEmitter<R> {
let mut error = None;
tag.attributes
.entry(k)
- .and_modify(|_| {
- error = Some(Error::DuplicateAttribute);
+ .and_modify(|a| {
+ error = Some((Error::DuplicateAttribute, a.name_span.clone()));
})
.or_insert(v);
- if let Some(e) = error {
- self.emit_error(e);
+ if let Some((e, span)) = error {
+ self.emit_error_span(e, span);
}
}
Some(Token::EndTag(_)) => {
+ self.attr_in_end_tag_span = v.name_span.clone();
if !self.seen_attributes.insert(k) {
- self.emit_error(Error::DuplicateAttribute);
+ self.emit_error_span(Error::DuplicateAttribute, v.name_span);
}
}
_ => {
@@ -120,6 +123,12 @@ impl<R: GetPos> SpanEmitter<R> {
let s = mem::take(&mut self.current_characters);
self.emit_token(Token::String(s));
}
+
+ fn emit_error_span(&mut self, error: Error, span: Span) {
+ // bypass character flushing in self.emit_token: we don't need the error location to be
+ // that exact
+ self.emitted_tokens.push_front(Token::Error { error, span });
+ }
}
impl<R: GetPos> Emitter<R> for SpanEmitter<R> {
@@ -135,10 +144,8 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> {
self.flush_current_characters();
}
- fn emit_error(&mut self, error: Error) {
- // bypass character flushing in self.emit_token: we don't need the error location to be
- // that exact
- self.emitted_tokens.push_front(Token::Error(error));
+ fn emit_error(&mut self, error: Error, reader: &R) {
+ self.emit_error_span(error, reader.get_pos() - 1..reader.get_pos() - 1)
}
fn pop_token(&mut self) -> Option<Self::Token> {
@@ -172,7 +179,10 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> {
match token {
Token::EndTag(_) => {
if !self.seen_attributes.is_empty() {
- self.emit_error(Error::EndTagWithAttributes);
+ self.emit_error_span(
+ Error::EndTagWithAttributes,
+ self.attr_in_end_tag_span.clone(),
+ );
}
self.seen_attributes.clear();
}
@@ -195,7 +205,7 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> {
self.emit_token(doctype);
}
- fn set_self_closing(&mut self) {
+ fn set_self_closing(&mut self, reader: &R) {
let tag = self.current_token.as_mut().unwrap();
match tag {
Token::StartTag(StartTag {
@@ -205,7 +215,7 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> {
*self_closing = true;
}
Token::EndTag(_) => {
- self.emit_error(Error::EndTagWithTrailingSolidus);
+ self.emit_error(Error::EndTagWithTrailingSolidus, reader);
}
_ => {
debug_assert!(false);
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index efaa870..6e928e9 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -122,6 +122,12 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
self.state = state.into();
}
+ /// Just a helper method for the machine.
+ #[inline]
+ pub(crate) fn emit_error(&mut self, error: Error) {
+ self.emitter.emit_error(error, &self.reader);
+ }
+
/// Test-internal function to override internal state.
///
/// Only available with the `integration-tests` feature which is not public API.
@@ -139,17 +145,16 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
fn validate_char(&mut self, c: char) {
match c as u32 {
surrogate_pat!() => {
- self.emitter.emit_error(Error::SurrogateInInputStream);
+ self.emit_error(Error::SurrogateInInputStream);
}
noncharacter_pat!() => {
- self.emitter.emit_error(Error::NoncharacterInInputStream);
+ self.emit_error(Error::NoncharacterInInputStream);
}
// control without whitespace or nul
x @ control_pat!()
if !matches!(x, 0x0000 | 0x0009 | 0x000a | 0x000c | 0x000d | 0x0020) =>
{
- self.emitter
- .emit_error(Error::ControlCharacterInInputStream);
+ self.emit_error(Error::ControlCharacterInInputStream);
}
_ => (),
}