summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-03 10:47:44 +0200
committerMartin Fischer <martin@push-f.com>2023-09-03 23:00:05 +0200
commitd56686deab81c8b50207b75a485cf26ec8502383 (patch)
tree47f9466c6643a5851efb10f3422d056598341fab
parent0576e5a9b93f28bd4d2adc224598de319e14f1be (diff)
break!: make Doctype name field optional
-rw-r--r--CHANGELOG.md2
-rw-r--r--integration_tests/tests/test_html5lib.rs2
-rw-r--r--src/emitter.rs22
-rw-r--r--src/machine.rs2
4 files changed, 24 insertions, 4 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b0b5e7..0deb60e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,8 @@
* `StartTag`/`EndTag`: Added `name_span` fields
(and removed the same-named methods).
+ * `Doctype`: The `name` field is now optional.
+
* `AttributeOwned`: The `name_offset` and `value_offset` fields have
been replaced with `name_span` and `value_span` respectively.
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs
index f351f85..f13e580 100644
--- a/integration_tests/tests/test_html5lib.rs
+++ b/integration_tests/tests/test_html5lib.rs
@@ -146,7 +146,7 @@ fn run_test_inner<R: Reader>(
Token::String(data) => actual.tokens.push(TestToken::Character(data)),
Token::Comment(comment) => actual.tokens.push(TestToken::Comment(comment.data)),
Token::Doctype(doctype) => actual.tokens.push(TestToken::Doctype {
- name: Some(doctype.name).filter(|name| !name.is_empty()),
+ name: doctype.name,
public_id: doctype.public_id,
system_id: doctype.system_id,
force_quirks: doctype.force_quirks,
diff --git a/src/emitter.rs b/src/emitter.rs
index db3da78..69baec2 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -153,6 +153,11 @@ pub trait Emitter<O> {
/// If there is no current attribute, this method may panic.
fn push_attribute_value(&mut self, s: &str);
+ /// Assuming the _current token_ is a doctype, set its name to the empty string.
+ ///
+ /// If the current token is not a doctype, this method may panic.
+ fn init_doctype_name(&mut self, offset: O) {}
+
/// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string.
///
/// If the current token is not a doctype, this method may panic.
@@ -411,15 +416,26 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
}
}
+ fn init_doctype_name(&mut self, offset: O) {
+ let Some(Token::Doctype(doctype)) = &mut self.current_token else {
+ debug_assert!(false);
+ return;
+ };
+ doctype.name = Some("".into());
+ }
+
fn push_doctype_name(&mut self, s: &str) {
match self.current_token {
- Some(Token::Doctype(ref mut doctype)) => doctype.name.push_str(s),
+ Some(Token::Doctype(Doctype {
+ name: Some(ref mut name),
+ ..
+ })) => name.push_str(s),
_ => debug_assert!(false),
}
}
fn init_doctype(&mut self, offset: O) {
self.current_token = Some(Token::Doctype(Doctype {
- name: String::new(),
+ name: None,
force_quirks: false,
public_id: None,
system_id: None,
@@ -601,7 +617,7 @@ pub struct Doctype<O> {
/// The doctype's name. Uppercase ASCII characters (A-Z) have been
/// converted to lowercase. For HTML documents this should be "html".
- pub name: String,
+ pub name: Option<String>,
/// The doctype's public identifier.
pub public_id: Option<String>,
diff --git a/src/machine.rs b/src/machine.rs
index 26e1652..6c4558c 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1208,6 +1208,7 @@ where
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.init_doctype();
+ slf.emitter.init_doctype_name(slf.position_before_match);
slf.emitter.push_doctype_name("\u{fffd}");
slf.state = State::DoctypeName;
Ok(ControlToken::Continue)
@@ -1229,6 +1230,7 @@ where
}
Some(x) => {
slf.init_doctype();
+ slf.emitter.init_doctype_name(slf.position_before_match);
slf.emitter
.push_doctype_name(ctostr!(x.to_ascii_lowercase()));
slf.state = State::DoctypeName;