diff options
author | Martin Fischer <martin@push-f.com> | 2023-08-17 17:25:32 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-08-19 13:41:55 +0200 |
commit | 378662fa52bbc3e9e4a210f649093dcdadf51afa (patch) | |
tree | 44168be8588b8c17fc920839bc93595ea0152aea /src/emitter.rs | |
parent | e34083e64b764df076c1ef9ec6bf1102b9fbf748 (diff) |
feat!: add span and offsets to Doctype
Diffstat (limited to 'src/emitter.rs')
-rw-r--r-- | src/emitter.rs | 51 |
1 files changed, 40 insertions, 11 deletions
diff --git a/src/emitter.rs b/src/emitter.rs index f665f47..5b64acd 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -69,12 +69,12 @@ pub trait Emitter<O> { /// Emit the _current token_, assuming it is a comment. /// /// If the current token is not a comment, this method may panic. - fn emit_current_comment(&mut self); + fn emit_current_comment(&mut self, offset: O); /// Emit the _current token_, assuming it is a doctype. /// /// If the current token is not a doctype, this method may panic. - fn emit_current_doctype(&mut self); + fn emit_current_doctype(&mut self, offset: O); /// Assuming the _current token_ is a start tag, set the self-closing flag. /// @@ -140,12 +140,12 @@ pub trait Emitter<O> { /// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string. /// /// If the current token is not a doctype, this method may panic. - fn init_doctype_public_id(&mut self); + fn init_doctype_public_id(&mut self, offset: O); /// Assuming the _current token_ is a doctype, set its "system identifier" to the empty string. /// /// If the current token is not a doctype, this method may panic. - fn init_doctype_system_id(&mut self); + fn init_doctype_system_id(&mut self, offset: O); /// Assuming the _current token_ is a doctype, append a string to its "public identifier" to the given string. /// @@ -308,17 +308,18 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { } self.emit_token(token); } - fn emit_current_comment(&mut self) { + fn emit_current_comment(&mut self, _offset: O) { let comment = self.current_token.take().unwrap(); debug_assert!(matches!(comment, Token::Comment(_))); self.emit_token(comment); } - fn emit_current_doctype(&mut self) { + fn emit_current_doctype(&mut self, offset: O) { let Some(Token::Doctype(mut doctype)) = self.current_token.take() else { debug_assert!(false); return; }; + doctype.span.end = offset; self.emit_token(Token::Doctype(doctype)); } @@ -370,12 +371,15 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { _ => debug_assert!(false), } } - fn init_doctype(&mut self, _offset: O) { + fn init_doctype(&mut self, offset: O) { self.current_token = Some(Token::Doctype(Doctype { name: String::new(), force_quirks: false, public_id: None, system_id: None, + span: offset..O::default(), + public_id_offset: O::default(), + system_id_offset: O::default(), })); } @@ -405,19 +409,21 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { let current_attr = self.current_attribute.as_mut().unwrap(); current_attr.1.value.push_str(s); } - fn init_doctype_public_id(&mut self) { + fn init_doctype_public_id(&mut self, offset: O) { let Some(Token::Doctype(doctype)) = &mut self.current_token else { debug_assert!(false); return; }; doctype.public_id = Some("".to_owned()); + doctype.public_id_offset = offset; } - fn init_doctype_system_id(&mut self) { + fn init_doctype_system_id(&mut self, offset: O) { let Some(Token::Doctype(doctype)) = &mut self.current_token else { debug_assert!(false); return; }; doctype.system_id = Some("".to_owned()); + doctype.system_id_offset = offset; } fn push_doctype_public_id(&mut self, s: &str) { if let Some(Token::Doctype(Doctype { @@ -512,7 +518,7 @@ impl<O: Offset> Comment<O> { /// * `<!DOCTYPE {name} SYSTEM '{system_id}'>` /// * `<!DOCTYPE {name} PUBLIC '{public_id}' '{system_id}'>` #[derive(Debug, Eq, PartialEq)] -pub struct Doctype { +pub struct Doctype<O> { /// The ["force quirks"](https://html.spec.whatwg.org/#force-quirks-flag) flag. pub force_quirks: bool, @@ -524,6 +530,29 @@ pub struct Doctype { /// The doctype's system identifier. pub system_id: Option<String>, + + /// The source code span of the doctype. + pub span: Range<O>, + + /// The source offset of the pulic identifier. + public_id_offset: O, + + /// The source offset of the system identifier. + system_id_offset: O, +} + +impl<O: Offset> Doctype<O> { + /// Calculates the span of the public identifier and returns it. + pub fn public_id_span(&self) -> Option<Range<O>> { + let public_id = self.public_id.as_ref()?; + Some(self.public_id_offset..self.public_id_offset + public_id.len()) + } + + /// Calculates the span of the system identifier and returns it. + pub fn system_id_span(&self) -> Option<Range<O>> { + let system_id = self.system_id.as_ref()?; + Some(self.system_id_offset..self.system_id_offset + system_id.len()) + } } /// The token type used by default. You can define your own token type by implementing the @@ -539,7 +568,7 @@ pub enum Token<O> { /// A HTML comment. Comment(Comment<O>), /// A HTML doctype declaration. - Doctype(Doctype), + Doctype(Doctype<O>), /// A HTML parsing error. /// /// Can be skipped over, the tokenizer is supposed to recover from the error and continues with |