aboutsummaryrefslogtreecommitdiff
path: root/src/emitter.rs
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-17 17:25:32 +0200
committerMartin Fischer <martin@push-f.com>2023-08-19 13:41:55 +0200
commit378662fa52bbc3e9e4a210f649093dcdadf51afa (patch)
tree44168be8588b8c17fc920839bc93595ea0152aea /src/emitter.rs
parente34083e64b764df076c1ef9ec6bf1102b9fbf748 (diff)
feat!: add span and offsets to Doctype
Diffstat (limited to 'src/emitter.rs')
-rw-r--r--src/emitter.rs51
1 files changed, 40 insertions, 11 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index f665f47..5b64acd 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -69,12 +69,12 @@ pub trait Emitter<O> {
/// Emit the _current token_, assuming it is a comment.
///
/// If the current token is not a comment, this method may panic.
- fn emit_current_comment(&mut self);
+ fn emit_current_comment(&mut self, offset: O);
/// Emit the _current token_, assuming it is a doctype.
///
/// If the current token is not a doctype, this method may panic.
- fn emit_current_doctype(&mut self);
+ fn emit_current_doctype(&mut self, offset: O);
/// Assuming the _current token_ is a start tag, set the self-closing flag.
///
@@ -140,12 +140,12 @@ pub trait Emitter<O> {
/// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string.
///
/// If the current token is not a doctype, this method may panic.
- fn init_doctype_public_id(&mut self);
+ fn init_doctype_public_id(&mut self, offset: O);
/// Assuming the _current token_ is a doctype, set its "system identifier" to the empty string.
///
/// If the current token is not a doctype, this method may panic.
- fn init_doctype_system_id(&mut self);
+ fn init_doctype_system_id(&mut self, offset: O);
/// Assuming the _current token_ is a doctype, append a string to its "public identifier" to the given string.
///
@@ -308,17 +308,18 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
}
self.emit_token(token);
}
- fn emit_current_comment(&mut self) {
+ fn emit_current_comment(&mut self, _offset: O) {
let comment = self.current_token.take().unwrap();
debug_assert!(matches!(comment, Token::Comment(_)));
self.emit_token(comment);
}
- fn emit_current_doctype(&mut self) {
+ fn emit_current_doctype(&mut self, offset: O) {
let Some(Token::Doctype(mut doctype)) = self.current_token.take() else {
debug_assert!(false);
return;
};
+ doctype.span.end = offset;
self.emit_token(Token::Doctype(doctype));
}
@@ -370,12 +371,15 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
_ => debug_assert!(false),
}
}
- fn init_doctype(&mut self, _offset: O) {
+ fn init_doctype(&mut self, offset: O) {
self.current_token = Some(Token::Doctype(Doctype {
name: String::new(),
force_quirks: false,
public_id: None,
system_id: None,
+ span: offset..O::default(),
+ public_id_offset: O::default(),
+ system_id_offset: O::default(),
}));
}
@@ -405,19 +409,21 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
let current_attr = self.current_attribute.as_mut().unwrap();
current_attr.1.value.push_str(s);
}
- fn init_doctype_public_id(&mut self) {
+ fn init_doctype_public_id(&mut self, offset: O) {
let Some(Token::Doctype(doctype)) = &mut self.current_token else {
debug_assert!(false);
return;
};
doctype.public_id = Some("".to_owned());
+ doctype.public_id_offset = offset;
}
- fn init_doctype_system_id(&mut self) {
+ fn init_doctype_system_id(&mut self, offset: O) {
let Some(Token::Doctype(doctype)) = &mut self.current_token else {
debug_assert!(false);
return;
};
doctype.system_id = Some("".to_owned());
+ doctype.system_id_offset = offset;
}
fn push_doctype_public_id(&mut self, s: &str) {
if let Some(Token::Doctype(Doctype {
@@ -512,7 +518,7 @@ impl<O: Offset> Comment<O> {
/// * `<!DOCTYPE {name} SYSTEM '{system_id}'>`
/// * `<!DOCTYPE {name} PUBLIC '{public_id}' '{system_id}'>`
#[derive(Debug, Eq, PartialEq)]
-pub struct Doctype {
+pub struct Doctype<O> {
/// The ["force quirks"](https://html.spec.whatwg.org/#force-quirks-flag) flag.
pub force_quirks: bool,
@@ -524,6 +530,29 @@ pub struct Doctype {
/// The doctype's system identifier.
pub system_id: Option<String>,
+
+ /// The source code span of the doctype.
+ pub span: Range<O>,
+
+ /// The source offset of the pulic identifier.
+ public_id_offset: O,
+
+ /// The source offset of the system identifier.
+ system_id_offset: O,
+}
+
+impl<O: Offset> Doctype<O> {
+ /// Calculates the span of the public identifier and returns it.
+ pub fn public_id_span(&self) -> Option<Range<O>> {
+ let public_id = self.public_id.as_ref()?;
+ Some(self.public_id_offset..self.public_id_offset + public_id.len())
+ }
+
+ /// Calculates the span of the system identifier and returns it.
+ pub fn system_id_span(&self) -> Option<Range<O>> {
+ let system_id = self.system_id.as_ref()?;
+ Some(self.system_id_offset..self.system_id_offset + system_id.len())
+ }
}
/// The token type used by default. You can define your own token type by implementing the
@@ -539,7 +568,7 @@ pub enum Token<O> {
/// A HTML comment.
Comment(Comment<O>),
/// A HTML doctype declaration.
- Doctype(Doctype),
+ Doctype(Doctype<O>),
/// A HTML parsing error.
///
/// Can be skipped over, the tokenizer is supposed to recover from the error and continues with