diff options
-rw-r--r-- | integration_tests/tests/test_html5lib.rs | 2 | ||||
-rw-r--r-- | src/emitter.rs | 27 | ||||
-rw-r--r-- | src/lib.rs | 2 | ||||
-rw-r--r-- | tests/test_spans.rs | 33 |
4 files changed, 58 insertions, 6 deletions
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index 209e199..0f96063 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -135,7 +135,7 @@ fn run_test_inner<R: Reader>( }), Token::EndTag(tag) => actual.tokens.push(TestToken::EndTag { name: tag.name }), Token::String(data) => actual.tokens.push(TestToken::Character(data)), - Token::Comment(data) => actual.tokens.push(TestToken::Comment(data)), + Token::Comment(comment) => actual.tokens.push(TestToken::Comment(comment.data)), Token::Doctype(doctype) => actual.tokens.push(TestToken::Doctype { name: Some(doctype.name).filter(|name| !name.is_empty()), public_id: doctype.public_identifier, diff --git a/src/emitter.rs b/src/emitter.rs index b3fdb99..caf7b55 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -270,8 +270,11 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> { self.seen_attributes.clear(); } - fn init_comment(&mut self, _reader: &R) { - self.current_token = Some(Token::Comment(String::new())); + fn init_comment(&mut self, reader: &R) { + self.current_token = Some(Token::Comment(Comment { + data: String::new(), + data_offset: reader.position(), + })); } fn emit_current_tag(&mut self) { self.flush_current_attribute(); @@ -348,7 +351,7 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> { fn push_comment(&mut self, s: &str) { match self.current_token { - Some(Token::Comment(ref mut data)) => data.push_str(s), + Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s), _ => debug_assert!(false), } } @@ -483,6 +486,22 @@ pub struct EndTag<O> { pub name_span: Range<O>, } +/// An HTML comment. +#[derive(PartialEq, Eq, Debug)] +pub struct Comment<O> { + /// The text within the comment. + pub data: String, + /// The source offset of the comment data. + pub data_offset: O, +} + +impl<O: Offset> Comment<O> { + /// Calculates the span for the comment data and returns it. + pub fn data_span(&self) -> Range<O> { + self.data_offset..self.data_offset + self.data.len() + } +} + /// A doctype. Some examples: /// /// * `<!DOCTYPE {name}>` @@ -515,7 +534,7 @@ pub enum Token<O> { /// A literal string. String(String), /// A HTML comment. - Comment(String), + Comment(Comment<O>), /// A HTML doctype declaration. Doctype(Doctype), /// A HTML parsing error. @@ -15,6 +15,6 @@ mod utils; #[cfg(feature = "integration-tests")] pub use utils::State as InternalState; -pub use emitter::{Attribute, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; +pub use emitter::{Attribute, Comment, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; pub use error::Error; pub use tokenizer::{State, Tokenizer}; diff --git a/tests/test_spans.rs b/tests/test_spans.rs index b41b1b9..21882a3 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -105,6 +105,39 @@ fn attribute_value_span() { "###); } +#[test] +fn comment_proper_data_span() { + let html = "<!-- Why are you looking at the source code? -->"; + let Token::Comment(comment) = tokenizer(html).next().unwrap() else { + panic!("expected comment"); + }; + // FIXME: this span is wrong (starts one byte too soon) + assert_eq!(comment.data, html[1..][comment.data_span()]); + let labels = vec![(comment.data_span(), "")]; + assert_snapshot!(annotate(html, labels), @r###" + <!-- Why are you looking at the source code? --> + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + "###); +} + +#[test] +fn comment_bogus_data_span() { + let html = "<! Why are you looking at the source code? -->"; + let Token::Comment(comment) = tokenizer(html) + .filter(|t| !matches!(t, Token::Error { .. })) + .next() + .unwrap() + else { + panic!("expected comment"); + }; + assert_eq!(comment.data, html[comment.data_span()]); + let labels = vec![(comment.data_span(), "")]; + assert_snapshot!(annotate(html, labels), @r###" + <! Why are you looking at the source code? --> + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + "###); +} + fn annotate_errors(html: &'static str) -> String { let mut labels = Vec::new(); for token in tokenizer(html) { |