diff options
| -rw-r--r-- | src/emitter.rs | 26 | ||||
| -rw-r--r-- | src/machine.rs | 266 | ||||
| -rw-r--r-- | src/spans.rs | 34 | ||||
| -rw-r--r-- | src/tokenizer.rs | 13 | ||||
| -rw-r--r-- | tests/span-tests/demo.html | 10 | ||||
| -rw-r--r-- | tests/span-tests/demo.out | 46 | ||||
| -rw-r--r-- | tests/test_html5lib.rs | 4 | ||||
| -rw-r--r-- | tests/test_spans.rs | 4 | 
8 files changed, 205 insertions, 198 deletions
| diff --git a/src/emitter.rs b/src/emitter.rs index 48ac391..b47dc20 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -45,7 +45,7 @@ pub trait Emitter<R> {      fn emit_eof(&mut self);      /// A (probably recoverable) parsing error has occured. -    fn emit_error(&mut self, error: Error); +    fn emit_error(&mut self, error: Error, reader: &R);      /// After every state change, the tokenizer calls this method to retrieve a new token that can      /// be returned via the tokenizer's iterator interface. @@ -89,7 +89,7 @@ pub trait Emitter<R> {      ///      /// If the current token is an end tag, the emitter should emit the      /// [`crate::Error::EndTagWithTrailingSolidus`] error. -    fn set_self_closing(&mut self); +    fn set_self_closing(&mut self, reader: &R);      /// Assuming the _current token_ is a doctype, set its "force quirks" flag to true.      /// @@ -250,6 +250,13 @@ impl<R> DefaultEmitter<R, ()> {          let s = mem::take(&mut self.current_characters);          self.emit_token(Token::String(s));      } + +    fn emit_error(&mut self, error: Error) { +        // bypass character flushing in self.emit_token: we don't need the error location to be +        // that exact +        self.emitted_tokens +            .push_front(Token::Error { error, span: () }); +    }  }  impl<R> Emitter<R> for DefaultEmitter<R, ()> { @@ -265,10 +272,8 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {          self.flush_current_characters();      } -    fn emit_error(&mut self, error: Error) { -        // bypass character flushing in self.emit_token: we don't need the error location to be -        // that exact -        self.emitted_tokens.push_front(Token::Error(error)); +    fn emit_error(&mut self, error: Error, _reader: &R) { +        self.emit_error(error);      }      fn pop_token(&mut self) -> Option<Self::Token> { @@ -319,7 +324,7 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {          self.emit_token(doctype);      } -    fn set_self_closing(&mut self) { +    fn set_self_closing(&mut self, _reader: &R) {          let tag = self.current_token.as_mut().unwrap();          match tag {              Token::StartTag(StartTag { @@ -540,5 +545,10 @@ pub enum Token<S> {      ///      /// Can be skipped over, the tokenizer is supposed to recover from the error and continues with      /// more tokens afterward. -    Error(Error), +    Error { +        /// What kind of error occured. +        error: Error, +        /// The source code span of the error. +        span: S, +    },  } diff --git a/src/machine.rs b/src/machine.rs index 4300f45..8c062ec 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -39,7 +39,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.emit_string("\0");                  Ok(ControlToken::Continue)              } @@ -60,7 +60,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              } @@ -76,7 +76,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              } @@ -92,7 +92,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              } @@ -104,7 +104,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(          },          State::PlainText => match slf.read_char()? {              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              } @@ -130,21 +130,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              c @ Some('?') => { -                slf.emitter -                    .emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName); +                slf.emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName);                  slf.emitter.init_comment(&slf.reader);                  slf.state = State::BogusComment;                  slf.unread_char(c);                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofBeforeTagName); +                slf.emit_error(Error::EofBeforeTagName);                  slf.emitter.emit_string("<");                  Ok(ControlToken::Eof)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::InvalidFirstCharacterOfTagName); +                slf.emit_error(Error::InvalidFirstCharacterOfTagName);                  slf.state = State::Data;                  slf.emitter.emit_string("<");                  slf.unread_char(c); @@ -159,18 +157,17 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::MissingEndTagName); +                slf.emit_error(Error::MissingEndTagName);                  slf.state = State::Data;                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofBeforeTagName); +                slf.emit_error(Error::EofBeforeTagName);                  slf.emitter.emit_string("</");                  Ok(ControlToken::Eof)              }              Some(x) => { -                slf.emitter -                    .emit_error(Error::InvalidFirstCharacterOfTagName); +                slf.emit_error(Error::InvalidFirstCharacterOfTagName);                  slf.emitter.init_comment(&slf.reader);                  slf.state = State::BogusComment;                  slf.unread_char(Some(x)); @@ -192,7 +189,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_tag_name("\u{fffd}");                  Ok(ControlToken::Continue)              } @@ -201,7 +198,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInTag); +                slf.emit_error(Error::EofInTag);                  Ok(ControlToken::Eof)              }          }, @@ -409,13 +406,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter -                    .emit_error(Error::EofInScriptHtmlCommentLikeText); +                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -434,14 +430,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.state = State::ScriptDataEscaped;                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter -                    .emit_error(Error::EofInScriptHtmlCommentLikeText); +                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -465,14 +460,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.state = State::ScriptDataEscaped;                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter -                    .emit_error(Error::EofInScriptHtmlCommentLikeText); +                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -575,13 +569,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter -                    .emit_error(Error::EofInScriptHtmlCommentLikeText); +                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -601,14 +594,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.state = State::ScriptDataDoubleEscaped;                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter -                    .emit_error(Error::EofInScriptHtmlCommentLikeText); +                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -633,14 +625,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.state = State::ScriptDataDoubleEscaped;                  slf.emitter.emit_string("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter -                    .emit_error(Error::EofInScriptHtmlCommentLikeText); +                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -692,8 +683,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('=') => { -                slf.emitter -                    .emit_error(Error::UnexpectedEqualsSignBeforeAttributeName); +                slf.emit_error(Error::UnexpectedEqualsSignBeforeAttributeName);                  slf.emitter.init_attribute_name(&slf.reader);                  slf.emitter.push_attribute_name("=");                  slf.state = State::AttributeName; @@ -717,13 +707,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_attribute_name("\u{fffd}");                  Ok(ControlToken::Continue)              }              Some(x @ '"' | x @ '\'' | x @ '<') => { -                slf.emitter -                    .emit_error(Error::UnexpectedCharacterInAttributeName); +                slf.emit_error(Error::UnexpectedCharacterInAttributeName);                  slf.emitter                      .push_attribute_name(ctostr!(x.to_ascii_lowercase()));                  Ok(ControlToken::Continue) @@ -750,7 +739,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInTag); +                slf.emit_error(Error::EofInTag);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -773,7 +762,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::MissingAttributeValue); +                slf.emit_error(Error::MissingAttributeValue);                  slf.state = State::Data;                  slf.emitter.emit_current_tag();                  Ok(ControlToken::Continue) @@ -796,12 +785,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_attribute_value("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInTag); +                slf.emit_error(Error::EofInTag);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -820,12 +809,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_attribute_value("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInTag); +                slf.emit_error(Error::EofInTag);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -849,18 +838,17 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_attribute_value("\u{fffd}");                  Ok(ControlToken::Continue)              }              Some(x @ '"' | x @ '\'' | x @ '<' | x @ '=' | x @ '\u{60}') => { -                slf.emitter -                    .emit_error(Error::UnexpectedCharacterInUnquotedAttributeValue); +                slf.emit_error(Error::UnexpectedCharacterInUnquotedAttributeValue);                  slf.emitter.push_attribute_value(ctostr!(x));                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInTag); +                slf.emit_error(Error::EofInTag);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -883,12 +871,11 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInTag); +                slf.emit_error(Error::EofInTag);                  Ok(ControlToken::Eof)              }              Some(x) => { -                slf.emitter -                    .emit_error(Error::MissingWhitespaceBetweenAttributes); +                slf.emit_error(Error::MissingWhitespaceBetweenAttributes);                  slf.state = State::BeforeAttributeName;                  slf.unread_char(Some(x));                  Ok(ControlToken::Continue) @@ -896,17 +883,17 @@ pub fn consume<R: Reader, E: Emitter<R>>(          },          State::SelfClosingStartTag => match slf.read_char()? {              Some('>') => { -                slf.emitter.set_self_closing(); +                slf.emitter.set_self_closing(&slf.reader);                  slf.state = State::Data;                  slf.emitter.emit_current_tag();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInTag); +                slf.emit_error(Error::EofInTag);                  Ok(ControlToken::Eof)              }              Some(x) => { -                slf.emitter.emit_error(Error::UnexpectedSolidusInTag); +                slf.emit_error(Error::UnexpectedSolidusInTag);                  slf.state = State::BeforeAttributeName;                  slf.unread_char(Some(x));                  Ok(ControlToken::Continue) @@ -923,7 +910,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Eof)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_comment("\u{fffd}");                  Ok(ControlToken::Continue)              } @@ -949,7 +936,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  // missing: cdata transition                  //                  // let's hope that bogus comment can just sort of skip over cdata -                slf.emitter.emit_error(Error::CdataInHtmlContent); +                slf.emit_error(Error::CdataInHtmlContent);                  slf.emitter.init_comment(&slf.reader);                  slf.emitter.push_comment("[CDATA["); @@ -957,7 +944,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_error(Error::IncorrectlyOpenedComment); +                slf.emit_error(Error::IncorrectlyOpenedComment);                  slf.emitter.init_comment(&slf.reader);                  slf.state = State::BogusComment;                  slf.unread_char(c); @@ -970,7 +957,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::AbruptClosingOfEmptyComment); +                slf.emit_error(Error::AbruptClosingOfEmptyComment);                  slf.state = State::Data;                  slf.emitter.emit_current_comment();                  Ok(ControlToken::Continue) @@ -987,13 +974,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::AbruptClosingOfEmptyComment); +                slf.emit_error(Error::AbruptClosingOfEmptyComment);                  slf.state = State::Data;                  slf.emitter.emit_current_comment();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInComment); +                slf.emit_error(Error::EofInComment);                  slf.emitter.emit_current_comment();                  Ok(ControlToken::Eof)              } @@ -1015,12 +1002,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_comment("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInComment); +                slf.emit_error(Error::EofInComment);                  slf.emitter.emit_current_comment();                  Ok(ControlToken::Eof)              } @@ -1074,7 +1061,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter.emit_error(Error::NestedComment); +                slf.emit_error(Error::NestedComment);                  slf.unread_char(c);                  slf.state = State::CommentEnd;                  Ok(ControlToken::Continue) @@ -1086,7 +1073,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInComment); +                slf.emit_error(Error::EofInComment);                  slf.emitter.emit_current_comment();                  Ok(ControlToken::Eof)              } @@ -1112,7 +1099,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInComment); +                slf.emit_error(Error::EofInComment);                  slf.emitter.emit_current_comment();                  Ok(ControlToken::Eof)              } @@ -1133,13 +1120,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::IncorrectlyClosedComment); +                slf.emit_error(Error::IncorrectlyClosedComment);                  slf.state = State::Data;                  slf.emitter.emit_current_comment();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInComment); +                slf.emit_error(Error::EofInComment);                  slf.emitter.emit_current_comment();                  Ok(ControlToken::Eof)              } @@ -1163,15 +1150,14 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.init_doctype(&slf.reader);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::MissingWhitespaceBeforeDoctypeName); +                slf.emit_error(Error::MissingWhitespaceBeforeDoctypeName);                  slf.unread_char(c);                  slf.state = State::BeforeDoctypeName;                  Ok(ControlToken::Continue) @@ -1180,14 +1166,14 @@ pub fn consume<R: Reader, E: Emitter<R>>(          State::BeforeDoctypeName => match slf.read_char()? {              Some(whitespace_pat!()) => Ok(ControlToken::Continue),              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.init_doctype(&slf.reader);                  slf.emitter.push_doctype_name("\u{fffd}");                  slf.state = State::DoctypeName;                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::MissingDoctypeName); +                slf.emit_error(Error::MissingDoctypeName);                  slf.emitter.init_doctype(&slf.reader);                  slf.emitter.set_force_quirks();                  slf.state = State::Data; @@ -1195,7 +1181,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.init_doctype(&slf.reader);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype(); @@ -1220,12 +1206,12 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_doctype_name("\u{fffd}");                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof) @@ -1244,7 +1230,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof) @@ -1258,8 +1244,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::InvalidCharacterSequenceAfterDoctypeName); +                slf.emit_error(Error::InvalidCharacterSequenceAfterDoctypeName);                  slf.emitter.set_force_quirks();                  slf.unread_char(c);                  slf.state = State::BogusDoctype; @@ -1272,36 +1257,32 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('"') => { -                slf.emitter -                    .emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword); +                slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);                  slf.emitter.set_doctype_public_identifier("");                  slf.state = State::DoctypePublicIdentifierDoubleQuoted;                  Ok(ControlToken::Continue)              }              Some('\'') => { -                slf.emitter -                    .emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword); +                slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);                  slf.emitter.set_doctype_public_identifier("");                  slf.state = State::DoctypePublicIdentifierSingleQuoted;                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter -                    .emit_error(Error::MissingDoctypePublicIdentifier); +                slf.emit_error(Error::MissingDoctypePublicIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::Data;                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier); +                slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);                  slf.emitter.set_force_quirks();                  slf.unread_char(c);                  slf.state = State::BogusDoctype; @@ -1321,22 +1302,20 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter -                    .emit_error(Error::MissingDoctypePublicIdentifier); +                slf.emit_error(Error::MissingDoctypePublicIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::Data;                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier); +                slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);                  slf.emitter.set_force_quirks();                  slf.unread_char(c);                  slf.state = State::BogusDoctype; @@ -1349,19 +1328,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_doctype_public_identifier("\u{fffd}");                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::AbruptDoctypePublicIdentifier); +                slf.emit_error(Error::AbruptDoctypePublicIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::Data;                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof) @@ -1377,19 +1356,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_doctype_public_identifier("\u{fffd}");                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::AbruptDoctypePublicIdentifier); +                slf.emit_error(Error::AbruptDoctypePublicIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::Data;                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof) @@ -1410,28 +1389,25 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('"') => { -                slf.emitter -                    .emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); +                slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);                  slf.emitter.set_doctype_system_identifier("");                  slf.state = State::DoctypeSystemIdentifierDoubleQuoted;                  Ok(ControlToken::Continue)              }              Some('\'') => { -                slf.emitter -                    .emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); +                slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);                  slf.emitter.set_doctype_system_identifier("");                  slf.state = State::DoctypeSystemIdentifierSingleQuoted;                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier); +                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);                  slf.emitter.set_force_quirks();                  slf.unread_char(c);                  slf.state = State::BogusDoctype; @@ -1456,14 +1432,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier); +                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::BogusDoctype;                  slf.unread_char(c); @@ -1476,36 +1451,32 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('"') => { -                slf.emitter -                    .emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword); +                slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);                  slf.emitter.set_doctype_system_identifier("");                  slf.state = State::DoctypeSystemIdentifierDoubleQuoted;                  Ok(ControlToken::Continue)              }              Some('\'') => { -                slf.emitter -                    .emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword); +                slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);                  slf.emitter.set_doctype_system_identifier("");                  slf.state = State::DoctypeSystemIdentifierSingleQuoted;                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter -                    .emit_error(Error::MissingDoctypeSystemIdentifier); +                slf.emit_error(Error::MissingDoctypeSystemIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::Data;                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier); +                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::BogusDoctype;                  slf.unread_char(c); @@ -1525,22 +1496,20 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter -                    .emit_error(Error::MissingDoctypeSystemIdentifier); +                slf.emit_error(Error::MissingDoctypeSystemIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::Data;                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier); +                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::BogusDoctype;                  slf.unread_char(c); @@ -1553,19 +1522,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_doctype_system_identifier("\u{fffd}");                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::AbruptDoctypeSystemIdentifier); +                slf.emit_error(Error::AbruptDoctypeSystemIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::Data;                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof) @@ -1581,19 +1550,19 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  slf.emitter.push_doctype_system_identifier("\u{fffd}");                  Ok(ControlToken::Continue)              }              Some('>') => { -                slf.emitter.emit_error(Error::AbruptDoctypeSystemIdentifier); +                slf.emit_error(Error::AbruptDoctypeSystemIdentifier);                  slf.emitter.set_force_quirks();                  slf.state = State::Data;                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof) @@ -1611,14 +1580,13 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInDoctype); +                slf.emit_error(Error::EofInDoctype);                  slf.emitter.set_force_quirks();                  slf.emitter.emit_current_doctype();                  Ok(ControlToken::Eof)              }              c @ Some(_) => { -                slf.emitter -                    .emit_error(Error::UnexpectedCharacterAfterDoctypeSystemIdentifier); +                slf.emit_error(Error::UnexpectedCharacterAfterDoctypeSystemIdentifier);                  slf.unread_char(c);                  slf.state = State::BogusDoctype;                  Ok(ControlToken::Continue) @@ -1631,7 +1599,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              Some('\0') => { -                slf.emitter.emit_error(Error::UnexpectedNullCharacter); +                slf.emit_error(Error::UnexpectedNullCharacter);                  Ok(ControlToken::Continue)              }              None => { @@ -1646,7 +1614,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              None => { -                slf.emitter.emit_error(Error::EofInCdata); +                slf.emit_error(Error::EofInCdata);                  Ok(ControlToken::Eof)              }              Some(x) => { @@ -1728,8 +1696,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                      Ok(ControlToken::Continue)                  } else {                      if char_ref_name_last_character != Some(';') { -                        slf.emitter -                            .emit_error(Error::MissingSemicolonAfterCharacterReference); +                        slf.emit_error(Error::MissingSemicolonAfterCharacterReference);                      }                      slf.temporary_buffer.clear(); @@ -1756,8 +1723,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              c @ Some(';') => { -                slf.emitter -                    .emit_error(Error::UnknownNamedCharacterReference); +                slf.emit_error(Error::UnknownNamedCharacterReference);                  slf.unread_char(c);                  slf.state = slf.return_state.take().unwrap();                  Ok(ControlToken::Continue) @@ -1790,8 +1756,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter -                    .emit_error(Error::AbsenceOfDigitsInNumericCharacterReference); +                slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);                  slf.flush_code_points_consumed_as_character_reference();                  slf.unread_char(c);                  slf.state = slf.return_state.take().unwrap(); @@ -1805,8 +1770,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter -                    .emit_error(Error::AbsenceOfDigitsInNumericCharacterReference); +                slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);                  slf.flush_code_points_consumed_as_character_reference();                  slf.unread_char(c);                  slf.state = slf.return_state.take().unwrap(); @@ -1831,8 +1795,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter -                    .emit_error(Error::MissingSemicolonAfterCharacterReference); +                slf.emit_error(Error::MissingSemicolonAfterCharacterReference);                  slf.unread_char(c);                  slf.state = State::NumericCharacterReferenceEnd;                  Ok(ControlToken::Continue) @@ -1848,8 +1811,7 @@ pub fn consume<R: Reader, E: Emitter<R>>(                  Ok(ControlToken::Continue)              }              c => { -                slf.emitter -                    .emit_error(Error::MissingSemicolonAfterCharacterReference); +                slf.emit_error(Error::MissingSemicolonAfterCharacterReference);                  slf.unread_char(c);                  slf.state = State::NumericCharacterReferenceEnd;                  Ok(ControlToken::Continue) @@ -1858,28 +1820,26 @@ pub fn consume<R: Reader, E: Emitter<R>>(          State::NumericCharacterReferenceEnd => {              match slf.character_reference_code {                  0x00 => { -                    slf.emitter.emit_error(Error::NullCharacterReference); +                    slf.emit_error(Error::NullCharacterReference);                      slf.character_reference_code = 0xfffd;                  }                  0x110000.. => { -                    slf.emitter -                        .emit_error(Error::CharacterReferenceOutsideUnicodeRange); +                    slf.emit_error(Error::CharacterReferenceOutsideUnicodeRange);                      slf.character_reference_code = 0xfffd;                  }                  surrogate_pat!() => { -                    slf.emitter.emit_error(Error::SurrogateCharacterReference); +                    slf.emit_error(Error::SurrogateCharacterReference);                      slf.character_reference_code = 0xfffd;                  }                  // noncharacter                  noncharacter_pat!() => { -                    slf.emitter -                        .emit_error(Error::NoncharacterCharacterReference); +                    slf.emit_error(Error::NoncharacterCharacterReference);                  }                  // 0x000d, or a control that is not whitespace                  x @ 0x000d | x @ control_pat!()                      if !matches!(x, 0x0009 | 0x000a | 0x000c | 0x0020) =>                  { -                    slf.emitter.emit_error(Error::ControlCharacterReference); +                    slf.emit_error(Error::ControlCharacterReference);                      slf.character_reference_code = match x {                          0x80 => 0x20AC, // EURO SIGN (€)                          0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK (‚) diff --git a/src/spans.rs b/src/spans.rs index 6d7c18e..85a64a9 100644 --- a/src/spans.rs +++ b/src/spans.rs @@ -62,6 +62,7 @@ pub struct SpanEmitter<R> {      seen_attributes: BTreeSet<String>,      emitted_tokens: VecDeque<Token<Span>>,      reader: PhantomData<R>, +    attr_in_end_tag_span: Span,  }  impl<R> Default for SpanEmitter<R> { @@ -74,6 +75,7 @@ impl<R> Default for SpanEmitter<R> {              seen_attributes: BTreeSet::new(),              emitted_tokens: VecDeque::new(),              reader: PhantomData::default(), +            attr_in_end_tag_span: Span::default(),          }      }  } @@ -91,18 +93,19 @@ impl<R: GetPos> SpanEmitter<R> {                      let mut error = None;                      tag.attributes                          .entry(k) -                        .and_modify(|_| { -                            error = Some(Error::DuplicateAttribute); +                        .and_modify(|a| { +                            error = Some((Error::DuplicateAttribute, a.name_span.clone()));                          })                          .or_insert(v); -                    if let Some(e) = error { -                        self.emit_error(e); +                    if let Some((e, span)) = error { +                        self.emit_error_span(e, span);                      }                  }                  Some(Token::EndTag(_)) => { +                    self.attr_in_end_tag_span = v.name_span.clone();                      if !self.seen_attributes.insert(k) { -                        self.emit_error(Error::DuplicateAttribute); +                        self.emit_error_span(Error::DuplicateAttribute, v.name_span);                      }                  }                  _ => { @@ -120,6 +123,12 @@ impl<R: GetPos> SpanEmitter<R> {          let s = mem::take(&mut self.current_characters);          self.emit_token(Token::String(s));      } + +    fn emit_error_span(&mut self, error: Error, span: Span) { +        // bypass character flushing in self.emit_token: we don't need the error location to be +        // that exact +        self.emitted_tokens.push_front(Token::Error { error, span }); +    }  }  impl<R: GetPos> Emitter<R> for SpanEmitter<R> { @@ -135,10 +144,8 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> {          self.flush_current_characters();      } -    fn emit_error(&mut self, error: Error) { -        // bypass character flushing in self.emit_token: we don't need the error location to be -        // that exact -        self.emitted_tokens.push_front(Token::Error(error)); +    fn emit_error(&mut self, error: Error, reader: &R) { +        self.emit_error_span(error, reader.get_pos() - 1..reader.get_pos() - 1)      }      fn pop_token(&mut self) -> Option<Self::Token> { @@ -172,7 +179,10 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> {          match token {              Token::EndTag(_) => {                  if !self.seen_attributes.is_empty() { -                    self.emit_error(Error::EndTagWithAttributes); +                    self.emit_error_span( +                        Error::EndTagWithAttributes, +                        self.attr_in_end_tag_span.clone(), +                    );                  }                  self.seen_attributes.clear();              } @@ -195,7 +205,7 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> {          self.emit_token(doctype);      } -    fn set_self_closing(&mut self) { +    fn set_self_closing(&mut self, reader: &R) {          let tag = self.current_token.as_mut().unwrap();          match tag {              Token::StartTag(StartTag { @@ -205,7 +215,7 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> {                  *self_closing = true;              }              Token::EndTag(_) => { -                self.emit_error(Error::EndTagWithTrailingSolidus); +                self.emit_error(Error::EndTagWithTrailingSolidus, reader);              }              _ => {                  debug_assert!(false); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index efaa870..6e928e9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -122,6 +122,12 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {          self.state = state.into();      } +    /// Just a helper method for the machine. +    #[inline] +    pub(crate) fn emit_error(&mut self, error: Error) { +        self.emitter.emit_error(error, &self.reader); +    } +      /// Test-internal function to override internal state.      ///      /// Only available with the `integration-tests` feature which is not public API. @@ -139,17 +145,16 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {      fn validate_char(&mut self, c: char) {          match c as u32 {              surrogate_pat!() => { -                self.emitter.emit_error(Error::SurrogateInInputStream); +                self.emit_error(Error::SurrogateInInputStream);              }              noncharacter_pat!() => { -                self.emitter.emit_error(Error::NoncharacterInInputStream); +                self.emit_error(Error::NoncharacterInInputStream);              }              // control without whitespace or nul              x @ control_pat!()                  if !matches!(x, 0x0000 | 0x0009 | 0x000a | 0x000c | 0x000d | 0x0020) =>              { -                self.emitter -                    .emit_error(Error::ControlCharacterInInputStream); +                self.emit_error(Error::ControlCharacterInInputStream);              }              _ => (),          } diff --git a/tests/span-tests/demo.html b/tests/span-tests/demo.html index c635846..07c305b 100644 --- a/tests/span-tests/demo.html +++ b/tests/span-tests/demo.html @@ -3,3 +3,13 @@ this is a tag: <h1>test</h1>  tags can have attributes: <div id = foobar>  Attribute values can be quoted: <input name = 'age' type = "number"> + +But you cannot put attributes everywhere: </nope data=foobar> + +Please mind the gap: < test + +The pirate says &arrrrr; + +Does this open two pages? <a href=foo.html href=bar.html>click me</a> + +Do you start or do you end? </yes/> diff --git a/tests/span-tests/demo.out b/tests/span-tests/demo.out index ad9cfb8..37ab8be 100644 --- a/tests/span-tests/demo.out +++ b/tests/span-tests/demo.out @@ -1,19 +1,29 @@  note: -  ┌─ test.html:1:1 -  │ -1 │ this is a tag: <h1>test</h1> -  │                 ^^       ^^ end tag -  │                 │ -  │                 start tag -2 │ -3 │ tags can have attributes: <div id = foobar> -  │                                ^^   ^^^^^^ attr value -  │                                │ -  │                                attr name -4 │ -5 │ Attribute values can be quoted: <input name = 'age' type = "number"> -  │                                        ^^^^    ^^^  ^^^^    ^^^^^^ attr value -  │                                        │       │    │ -  │                                        │       │    attr name -  │                                        │       attr value -  │                                        attr name +   ┌─ test.html:1:17 +   │ + 1 │ this is a tag: <h1>test</h1> +   │                 ^^       ^^ end tag +   │                 │ +   │                 start tag +   · + 5 │ Attribute values can be quoted: <input name = 'age' type = "number"> +   │                                        ^^^^    ^^^  ^^^^    ^^^^^^ attr value +   │                                        │       │    │ +   │                                        │       │    attr name +   │                                        │       attr value +   │                                        attr name + 6 │ + 7 │ But you cannot put attributes everywhere: </nope data=foobar> +   │                                                  ^^^^ end-tag-with-attributes + 8 │ + 9 │ Please mind the gap: < test +   │                       ^ invalid-first-character-of-tag-name +10 │ +11 │ The pirate says &arrrrr; +   │                        ^ unknown-named-character-reference +12 │ +13 │ Does this open two pages? <a href=foo.html href=bar.html>click me</a> +   │                              ^^^^ duplicate-attribute +14 │ +15 │ Do you start or do you end? </yes/> +   │                                   ^ end-tag-with-trailing-solidus diff --git a/tests/test_html5lib.rs b/tests/test_html5lib.rs index 662f3c5..cd3785f 100644 --- a/tests/test_html5lib.rs +++ b/tests/test_html5lib.rs @@ -296,9 +296,9 @@ fn run_test_inner<R: Reader>(      for token in tokenizer {          let token = token.unwrap(); -        if let Token::Error(e) = token { +        if let Token::Error { error, .. } = token {              actual_errors.push(ParseError { -                code: ParseErrorInner(e), +                code: ParseErrorInner(error),              });          } else {              actual_tokens.push(token); diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 9cc745c..aeb4a94 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -31,7 +31,7 @@ fn test() {          if let Token::StartTag(tag) = token {              if tag.name == "h1" {                  labels.push(Label::primary(file_id, tag.name_span).with_message("start tag")); -            } else { +            } else if tag.name == "input" {                  for attr in tag.attributes.values() {                      labels.push(                          Label::primary(file_id, attr.name_span.clone()).with_message("attr name"), @@ -45,6 +45,8 @@ fn test() {              if tag.name == "h1" {                  labels.push(Label::primary(file_id, tag.name_span).with_message("end tag"));              } +        } else if let Token::Error { error, span } = token { +            labels.push(Label::primary(file_id, span).with_message(error.to_string()));          }      } | 
