diff options
| -rw-r--r-- | CHANGELOG.md | 2 | ||||
| -rw-r--r-- | integration_tests/tests/test_html5lib.rs | 2 | ||||
| -rw-r--r-- | src/emitter.rs | 22 | ||||
| -rw-r--r-- | src/machine.rs | 2 | 
4 files changed, 24 insertions, 4 deletions
| diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b0b5e7..0deb60e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,8 @@    * `StartTag`/`EndTag`: Added `name_span` fields      (and removed the same-named methods). +  * `Doctype`: The `name` field is now optional. +    * `AttributeOwned`: The `name_offset` and `value_offset` fields have      been replaced with `name_span` and `value_span` respectively. diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index f351f85..f13e580 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -146,7 +146,7 @@ fn run_test_inner<R: Reader>(              Token::String(data) => actual.tokens.push(TestToken::Character(data)),              Token::Comment(comment) => actual.tokens.push(TestToken::Comment(comment.data)),              Token::Doctype(doctype) => actual.tokens.push(TestToken::Doctype { -                name: Some(doctype.name).filter(|name| !name.is_empty()), +                name: doctype.name,                  public_id: doctype.public_id,                  system_id: doctype.system_id,                  force_quirks: doctype.force_quirks, diff --git a/src/emitter.rs b/src/emitter.rs index db3da78..69baec2 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -153,6 +153,11 @@ pub trait Emitter<O> {      /// If there is no current attribute, this method may panic.      fn push_attribute_value(&mut self, s: &str); +    /// Assuming the _current token_ is a doctype, set its name to the empty string. +    /// +    /// If the current token is not a doctype, this method may panic. +    fn init_doctype_name(&mut self, offset: O) {} +      /// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string.      ///      /// If the current token is not a doctype, this method may panic. @@ -411,15 +416,26 @@ impl<O: Offset> Emitter<O> for DefaultEmitter<O> {          }      } +    fn init_doctype_name(&mut self, offset: O) { +        let Some(Token::Doctype(doctype)) = &mut self.current_token else { +            debug_assert!(false); +            return; +        }; +        doctype.name = Some("".into()); +    } +      fn push_doctype_name(&mut self, s: &str) {          match self.current_token { -            Some(Token::Doctype(ref mut doctype)) => doctype.name.push_str(s), +            Some(Token::Doctype(Doctype { +                name: Some(ref mut name), +                .. +            })) => name.push_str(s),              _ => debug_assert!(false),          }      }      fn init_doctype(&mut self, offset: O) {          self.current_token = Some(Token::Doctype(Doctype { -            name: String::new(), +            name: None,              force_quirks: false,              public_id: None,              system_id: None, @@ -601,7 +617,7 @@ pub struct Doctype<O> {      /// The doctype's name. Uppercase ASCII characters (A-Z) have been      /// converted to lowercase. For HTML documents this should be "html". -    pub name: String, +    pub name: Option<String>,      /// The doctype's public identifier.      pub public_id: Option<String>, diff --git a/src/machine.rs b/src/machine.rs index 26e1652..6c4558c 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1208,6 +1208,7 @@ where              Some('\0') => {                  slf.emit_error(Error::UnexpectedNullCharacter);                  slf.init_doctype(); +                slf.emitter.init_doctype_name(slf.position_before_match);                  slf.emitter.push_doctype_name("\u{fffd}");                  slf.state = State::DoctypeName;                  Ok(ControlToken::Continue) @@ -1229,6 +1230,7 @@ where              }              Some(x) => {                  slf.init_doctype(); +                slf.emitter.init_doctype_name(slf.position_before_match);                  slf.emitter                      .push_doctype_name(ctostr!(x.to_ascii_lowercase()));                  slf.state = State::DoctypeName; | 
