diff options
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | integration_tests/tests/test_html5lib.rs | 2 | ||||
-rw-r--r-- | src/emitter.rs | 22 | ||||
-rw-r--r-- | src/machine.rs | 2 |
4 files changed, 24 insertions, 4 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b0b5e7..0deb60e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,8 @@ * `StartTag`/`EndTag`: Added `name_span` fields (and removed the same-named methods). + * `Doctype`: The `name` field is now optional. + * `AttributeOwned`: The `name_offset` and `value_offset` fields have been replaced with `name_span` and `value_span` respectively. diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index f351f85..f13e580 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -146,7 +146,7 @@ fn run_test_inner<R: Reader>( Token::String(data) => actual.tokens.push(TestToken::Character(data)), Token::Comment(comment) => actual.tokens.push(TestToken::Comment(comment.data)), Token::Doctype(doctype) => actual.tokens.push(TestToken::Doctype { - name: Some(doctype.name).filter(|name| !name.is_empty()), + name: doctype.name, public_id: doctype.public_id, system_id: doctype.system_id, force_quirks: doctype.force_quirks, diff --git a/src/emitter.rs b/src/emitter.rs index db3da78..69baec2 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -153,6 +153,11 @@ pub trait Emitter<O> { /// If there is no current attribute, this method may panic. fn push_attribute_value(&mut self, s: &str); + /// Assuming the _current token_ is a doctype, set its name to the empty string. + /// + /// If the current token is not a doctype, this method may panic. + fn init_doctype_name(&mut self, offset: O) {} + /// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string. /// /// If the current token is not a doctype, this method may panic. @@ -411,15 +416,26 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> { } } + fn init_doctype_name(&mut self, offset: O) { + let Some(Token::Doctype(doctype)) = &mut self.current_token else { + debug_assert!(false); + return; + }; + doctype.name = Some("".into()); + } + fn push_doctype_name(&mut self, s: &str) { match self.current_token { - Some(Token::Doctype(ref mut doctype)) => doctype.name.push_str(s), + Some(Token::Doctype(Doctype { + name: Some(ref mut name), + .. + })) => name.push_str(s), _ => debug_assert!(false), } } fn init_doctype(&mut self, offset: O) { self.current_token = Some(Token::Doctype(Doctype { - name: String::new(), + name: None, force_quirks: false, public_id: None, system_id: None, @@ -601,7 +617,7 @@ pub struct Doctype<O> { /// The doctype's name. Uppercase ASCII characters (A-Z) have been /// converted to lowercase. For HTML documents this should be "html". - pub name: String, + pub name: Option<String>, /// The doctype's public identifier. pub public_id: Option<String>, diff --git a/src/machine.rs b/src/machine.rs index 26e1652..6c4558c 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1208,6 +1208,7 @@ where Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); slf.init_doctype(); + slf.emitter.init_doctype_name(slf.position_before_match); slf.emitter.push_doctype_name("\u{fffd}"); slf.state = State::DoctypeName; Ok(ControlToken::Continue) @@ -1229,6 +1230,7 @@ where } Some(x) => { slf.init_doctype(); + slf.emitter.init_doctype_name(slf.position_before_match); slf.emitter .push_doctype_name(ctostr!(x.to_ascii_lowercase())); slf.state = State::DoctypeName; |