diff options
author | Martin Fischer <martin@push-f.com> | 2023-08-17 17:25:32 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-08-19 13:41:55 +0200 |
commit | 378662fa52bbc3e9e4a210f649093dcdadf51afa (patch) | |
tree | 44168be8588b8c17fc920839bc93595ea0152aea | |
parent | e34083e64b764df076c1ef9ec6bf1102b9fbf748 (diff) |
feat!: add span and offsets to Doctype
-rw-r--r-- | src/emitter.rs | 51 | ||||
-rw-r--r-- | src/machine.rs | 119 | ||||
-rw-r--r-- | src/tokenizer.rs | 8 | ||||
-rw-r--r-- | tests/test_spans.rs | 31 |
4 files changed, 134 insertions, 75 deletions
diff --git a/src/emitter.rs b/src/emitter.rs index f665f47..5b64acd 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -69,12 +69,12 @@ pub trait Emitter<O> { /// Emit the _current token_, assuming it is a comment. /// /// If the current token is not a comment, this method may panic. - fn emit_current_comment(&mut self); + fn emit_current_comment(&mut self, offset: O); /// Emit the _current token_, assuming it is a doctype. /// /// If the current token is not a doctype, this method may panic. - fn emit_current_doctype(&mut self); + fn emit_current_doctype(&mut self, offset: O); /// Assuming the _current token_ is a start tag, set the self-closing flag. /// @@ -140,12 +140,12 @@ pub trait Emitter<O> { /// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string. /// /// If the current token is not a doctype, this method may panic. - fn init_doctype_public_id(&mut self); + fn init_doctype_public_id(&mut self, offset: O); /// Assuming the _current token_ is a doctype, set its "system identifier" to the empty string. /// /// If the current token is not a doctype, this method may panic. - fn init_doctype_system_id(&mut self); + fn init_doctype_system_id(&mut self, offset: O); /// Assuming the _current token_ is a doctype, append a string to its "public identifier" to the given string. /// @@ -308,17 +308,18 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { } self.emit_token(token); } - fn emit_current_comment(&mut self) { + fn emit_current_comment(&mut self, _offset: O) { let comment = self.current_token.take().unwrap(); debug_assert!(matches!(comment, Token::Comment(_))); self.emit_token(comment); } - fn emit_current_doctype(&mut self) { + fn emit_current_doctype(&mut self, offset: O) { let Some(Token::Doctype(mut doctype)) = self.current_token.take() else { debug_assert!(false); return; }; + doctype.span.end = offset; self.emit_token(Token::Doctype(doctype)); } @@ -370,12 +371,15 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { _ => debug_assert!(false), } } - fn init_doctype(&mut self, _offset: O) { + fn init_doctype(&mut self, offset: O) { self.current_token = Some(Token::Doctype(Doctype { name: String::new(), force_quirks: false, public_id: None, system_id: None, + span: offset..O::default(), + public_id_offset: O::default(), + system_id_offset: O::default(), })); } @@ -405,19 +409,21 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { let current_attr = self.current_attribute.as_mut().unwrap(); current_attr.1.value.push_str(s); } - fn init_doctype_public_id(&mut self) { + fn init_doctype_public_id(&mut self, offset: O) { let Some(Token::Doctype(doctype)) = &mut self.current_token else { debug_assert!(false); return; }; doctype.public_id = Some("".to_owned()); + doctype.public_id_offset = offset; } - fn init_doctype_system_id(&mut self) { + fn init_doctype_system_id(&mut self, offset: O) { let Some(Token::Doctype(doctype)) = &mut self.current_token else { debug_assert!(false); return; }; doctype.system_id = Some("".to_owned()); + doctype.system_id_offset = offset; } fn push_doctype_public_id(&mut self, s: &str) { if let Some(Token::Doctype(Doctype { @@ -512,7 +518,7 @@ impl<O: Offset> Comment<O> { /// * `<!DOCTYPE {name} SYSTEM '{system_id}'>` /// * `<!DOCTYPE {name} PUBLIC '{public_id}' '{system_id}'>` #[derive(Debug, Eq, PartialEq)] -pub struct Doctype { +pub struct Doctype<O> { /// The ["force quirks"](https://html.spec.whatwg.org/#force-quirks-flag) flag. pub force_quirks: bool, @@ -524,6 +530,29 @@ pub struct Doctype { /// The doctype's system identifier. pub system_id: Option<String>, + + /// The source code span of the doctype. + pub span: Range<O>, + + /// The source offset of the pulic identifier. + public_id_offset: O, + + /// The source offset of the system identifier. + system_id_offset: O, +} + +impl<O: Offset> Doctype<O> { + /// Calculates the span of the public identifier and returns it. + pub fn public_id_span(&self) -> Option<Range<O>> { + let public_id = self.public_id.as_ref()?; + Some(self.public_id_offset..self.public_id_offset + public_id.len()) + } + + /// Calculates the span of the system identifier and returns it. + pub fn system_id_span(&self) -> Option<Range<O>> { + let system_id = self.system_id.as_ref()?; + Some(self.system_id_offset..self.system_id_offset + system_id.len()) + } } /// The token type used by default. You can define your own token type by implementing the @@ -539,7 +568,7 @@ pub enum Token<O> { /// A HTML comment. Comment(Comment<O>), /// A HTML doctype declaration. - Doctype(Doctype), + Doctype(Doctype<O>), /// A HTML parsing error. /// /// Can be skipped over, the tokenizer is supposed to recover from the error and continues with diff --git a/src/machine.rs b/src/machine.rs index 0755e20..0d99ab8 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -910,11 +910,11 @@ where State::BogusComment => match slf.read_char()? { Some('>') => { slf.state = State::Data; - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Continue) } None => { - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Eof) } Some('\0') => { @@ -935,6 +935,7 @@ where } Some('d' | 'D') if slf.try_read_string("octype", false)? => { slf.state = State::Doctype; + slf.doctype_offset = slf.reader.position() - b"<!doctype".len(); Ok(ControlToken::Continue) } Some('[') if slf.try_read_string("CDATA[", true)? => { @@ -967,7 +968,7 @@ where Some('>') => { slf.emit_error(Error::AbruptClosingOfEmptyComment); slf.state = State::Data; - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Continue) } c => { @@ -984,12 +985,12 @@ where Some('>') => { slf.emit_error(Error::AbruptClosingOfEmptyComment); slf.state = State::Data; - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInComment); - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1016,7 +1017,7 @@ where } None => { slf.emit_error(Error::EofInComment); - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Eof) } Some(x) => { @@ -1082,7 +1083,7 @@ where } None => { slf.emit_error(Error::EofInComment); - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Eof) } c => { @@ -1095,7 +1096,7 @@ where State::CommentEnd => match slf.read_char()? { Some('>') => { slf.state = State::Data; - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Continue) } Some('!') => { @@ -1108,7 +1109,7 @@ where } None => { slf.emit_error(Error::EofInComment); - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1130,12 +1131,12 @@ where Some('>') => { slf.emit_error(Error::IncorrectlyClosedComment); slf.state = State::Data; - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInComment); - slf.emitter.emit_current_comment(); + slf.emitter.emit_current_comment(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1159,9 +1160,9 @@ where } None => { slf.emit_error(Error::EofInDoctype); - slf.emitter.init_doctype(slf.reader.position()); + slf.emitter.init_doctype(slf.doctype_offset); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1175,28 +1176,28 @@ where Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); - slf.emitter.init_doctype(slf.reader.position()); + slf.emitter.init_doctype(slf.doctype_offset); slf.emitter.push_doctype_name("\u{fffd}"); slf.state = State::DoctypeName; Ok(ControlToken::Continue) } Some('>') => { slf.emit_error(Error::MissingDoctypeName); - slf.emitter.init_doctype(slf.reader.position()); + slf.emitter.init_doctype(slf.doctype_offset); slf.emitter.set_force_quirks(); slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); - slf.emitter.init_doctype(slf.reader.position()); + slf.emitter.init_doctype(slf.doctype_offset); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } Some(x) => { - slf.emitter.init_doctype(slf.reader.position()); + slf.emitter.init_doctype(slf.doctype_offset); slf.emitter .push_doctype_name(ctostr!(x.to_ascii_lowercase())); slf.state = State::DoctypeName; @@ -1210,7 +1211,7 @@ where } Some('>') => { slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } Some('\0') => { @@ -1221,7 +1222,7 @@ where None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } Some(x) => { @@ -1234,13 +1235,13 @@ where Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('>') => { slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } Some('p' | 'P') if slf.try_read_string("ublic", false)? => { @@ -1266,13 +1267,13 @@ where } Some('"') => { slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword); - slf.emitter.init_doctype_public_id(); + slf.emitter.init_doctype_public_id(slf.reader.position()); slf.state = State::DoctypePublicIdentifierDoubleQuoted; Ok(ControlToken::Continue) } Some('\'') => { slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword); - slf.emitter.init_doctype_public_id(); + slf.emitter.init_doctype_public_id(slf.reader.position()); slf.state = State::DoctypePublicIdentifierSingleQuoted; Ok(ControlToken::Continue) } @@ -1280,13 +1281,13 @@ where slf.emit_error(Error::MissingDoctypePublicIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1300,12 +1301,12 @@ where State::BeforeDoctypePublicIdentifier => match slf.read_char()? { Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('"') => { - slf.emitter.init_doctype_public_id(); + slf.emitter.init_doctype_public_id(slf.reader.position()); slf.state = State::DoctypePublicIdentifierDoubleQuoted; Ok(ControlToken::Continue) } Some('\'') => { - slf.emitter.init_doctype_public_id(); + slf.emitter.init_doctype_public_id(slf.reader.position()); slf.state = State::DoctypePublicIdentifierSingleQuoted; Ok(ControlToken::Continue) } @@ -1313,13 +1314,13 @@ where slf.emit_error(Error::MissingDoctypePublicIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1344,13 +1345,13 @@ where slf.emit_error(Error::AbruptDoctypePublicIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } Some(x) => { @@ -1372,13 +1373,13 @@ where slf.emit_error(Error::AbruptDoctypePublicIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } Some(x) => { @@ -1393,25 +1394,25 @@ where } Some('>') => { slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } Some('"') => { slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); - slf.emitter.init_doctype_system_id(); + slf.emitter.init_doctype_system_id(slf.reader.position()); slf.state = State::DoctypeSystemIdentifierDoubleQuoted; Ok(ControlToken::Continue) } Some('\'') => { slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); - slf.emitter.init_doctype_system_id(); + slf.emitter.init_doctype_system_id(slf.reader.position()); slf.state = State::DoctypeSystemIdentifierSingleQuoted; Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1426,23 +1427,23 @@ where Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('>') => { slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } Some('"') => { - slf.emitter.init_doctype_system_id(); + slf.emitter.init_doctype_system_id(slf.reader.position()); slf.state = State::DoctypeSystemIdentifierDoubleQuoted; Ok(ControlToken::Continue) } Some('\'') => { - slf.emitter.init_doctype_system_id(); + slf.emitter.init_doctype_system_id(slf.reader.position()); slf.state = State::DoctypeSystemIdentifierSingleQuoted; Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1460,13 +1461,13 @@ where } Some('"') => { slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword); - slf.emitter.init_doctype_system_id(); + slf.emitter.init_doctype_system_id(slf.reader.position()); slf.state = State::DoctypeSystemIdentifierDoubleQuoted; Ok(ControlToken::Continue) } Some('\'') => { slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword); - slf.emitter.init_doctype_system_id(); + slf.emitter.init_doctype_system_id(slf.reader.position()); slf.state = State::DoctypeSystemIdentifierSingleQuoted; Ok(ControlToken::Continue) } @@ -1474,13 +1475,13 @@ where slf.emit_error(Error::MissingDoctypeSystemIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1494,12 +1495,12 @@ where State::BeforeDoctypeSystemIdentifier => match slf.read_char()? { Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('"') => { - slf.emitter.init_doctype_system_id(); + slf.emitter.init_doctype_system_id(slf.reader.position()); slf.state = State::DoctypeSystemIdentifierDoubleQuoted; Ok(ControlToken::Continue) } Some('\'') => { - slf.emitter.init_doctype_system_id(); + slf.emitter.init_doctype_system_id(slf.reader.position()); slf.state = State::DoctypeSystemIdentifierSingleQuoted; Ok(ControlToken::Continue) } @@ -1507,13 +1508,13 @@ where slf.emit_error(Error::MissingDoctypeSystemIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1538,13 +1539,13 @@ where slf.emit_error(Error::AbruptDoctypeSystemIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } Some(x) => { @@ -1566,13 +1567,13 @@ where slf.emit_error(Error::AbruptDoctypeSystemIdentifier); slf.emitter.set_force_quirks(); slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } Some(x) => { @@ -1584,13 +1585,13 @@ where Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('>') => { slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } None => { slf.emit_error(Error::EofInDoctype); slf.emitter.set_force_quirks(); - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } c @ Some(_) => { @@ -1603,7 +1604,7 @@ where State::BogusDoctype => match slf.read_char()? { Some('>') => { slf.state = State::Data; - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Continue) } Some('\0') => { @@ -1611,7 +1612,7 @@ where Ok(ControlToken::Continue) } None => { - slf.emitter.emit_current_doctype(); + slf.emitter.emit_current_doctype(slf.reader.position()); Ok(ControlToken::Eof) } Some(_) => Ok(ControlToken::Continue), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1b80ec3..d272b14 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,5 +1,3 @@ -use std::marker::PhantomData; - use crate::machine; use crate::offset::{NoopOffset, Offset, Position}; use crate::reader::{IntoReader, Reader}; @@ -41,16 +39,16 @@ pub struct Tokenizer<R: Reader, O = NoopOffset, E: Emitter<O> = DefaultEmitter<O pub(crate) emitter: E, pub(crate) temporary_buffer: String, pub(crate) reader: R, - _offset: PhantomData<O>, to_reconsume: Stack2<Option<char>>, pub(crate) character_reference_code: u32, pub(crate) return_state: Option<InternalState>, current_tag_name: String, last_start_tag_name: String, is_start_tag: bool, + pub(crate) doctype_offset: O, } -impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> { +impl<R: Reader, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> { /// Creates a new tokenizer from some input and an emitter. /// /// TODO: add warning about you needing to do the state switching @@ -58,7 +56,6 @@ impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> { Tokenizer { reader: reader.into_reader(), emitter, - _offset: PhantomData, state: InternalState::Data, to_reconsume: Stack2::default(), return_state: None, @@ -68,6 +65,7 @@ impl<R: Reader, O, E: Emitter<O>> Tokenizer<R, O, E> { current_tag_name: String::new(), last_start_tag_name: String::new(), is_start_tag: false, + doctype_offset: O::default(), } } } diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 6bd9378..70bcf6e 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -167,6 +167,37 @@ fn comment_bogus_data_span() { "###); } +#[test] +fn doctype_span() { + let html = r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" >"#; + let Token::Doctype(doctype) = tokenizer(html).next().unwrap() else { + panic!("expected doctype"); + }; + let labels = vec![(doctype.span, "")]; + assert_snapshot!(annotate(html, labels), @r###" + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" > + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + "###); +} + +#[test] +fn doctype_id_spans() { + let html = r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#; + let Token::Doctype(doctype) = tokenizer(html).next().unwrap() else { + panic!("expected doctype"); + }; + let labels = vec![ + (doctype.public_id_span().unwrap(), "public id"), + (doctype.system_id_span().unwrap(), "system id"), + ]; + assert_snapshot!(annotate(html, labels), @r###" + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> + ^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ system id + │ + public id + "###); +} + fn annotate_errors(html: &'static str) -> String { let mut labels = Vec::new(); for token in tokenizer(html) { |