diff options
| -rw-r--r-- | integration_tests/tests/test_html5lib.rs | 2 | ||||
| -rw-r--r-- | src/emitter.rs | 27 | ||||
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | tests/test_spans.rs | 33 | 
4 files changed, 58 insertions, 6 deletions
| diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs index 209e199..0f96063 100644 --- a/integration_tests/tests/test_html5lib.rs +++ b/integration_tests/tests/test_html5lib.rs @@ -135,7 +135,7 @@ fn run_test_inner<R: Reader>(              }),              Token::EndTag(tag) => actual.tokens.push(TestToken::EndTag { name: tag.name }),              Token::String(data) => actual.tokens.push(TestToken::Character(data)), -            Token::Comment(data) => actual.tokens.push(TestToken::Comment(data)), +            Token::Comment(comment) => actual.tokens.push(TestToken::Comment(comment.data)),              Token::Doctype(doctype) => actual.tokens.push(TestToken::Doctype {                  name: Some(doctype.name).filter(|name| !name.is_empty()),                  public_id: doctype.public_identifier, diff --git a/src/emitter.rs b/src/emitter.rs index b3fdb99..caf7b55 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -270,8 +270,11 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {          self.seen_attributes.clear();      } -    fn init_comment(&mut self, _reader: &R) { -        self.current_token = Some(Token::Comment(String::new())); +    fn init_comment(&mut self, reader: &R) { +        self.current_token = Some(Token::Comment(Comment { +            data: String::new(), +            data_offset: reader.position(), +        }));      }      fn emit_current_tag(&mut self) {          self.flush_current_attribute(); @@ -348,7 +351,7 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {      fn push_comment(&mut self, s: &str) {          match self.current_token { -            Some(Token::Comment(ref mut data)) => data.push_str(s), +            Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s),              _ => debug_assert!(false),          }      } @@ -483,6 +486,22 @@ pub struct EndTag<O> {      pub name_span: Range<O>,  } +/// An HTML comment. +#[derive(PartialEq, Eq, Debug)] +pub struct Comment<O> { +    /// The text within the comment. +    pub data: String, +    /// The source offset of the comment data. +    pub data_offset: O, +} + +impl<O: Offset> Comment<O> { +    /// Calculates the span for the comment data and returns it. +    pub fn data_span(&self) -> Range<O> { +        self.data_offset..self.data_offset + self.data.len() +    } +} +  /// A doctype. Some examples:  ///  /// * `<!DOCTYPE {name}>` @@ -515,7 +534,7 @@ pub enum Token<O> {      /// A literal string.      String(String),      /// A HTML comment. -    Comment(String), +    Comment(Comment<O>),      /// A HTML doctype declaration.      Doctype(Doctype),      /// A HTML parsing error. @@ -15,6 +15,6 @@ mod utils;  #[cfg(feature = "integration-tests")]  pub use utils::State as InternalState; -pub use emitter::{Attribute, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; +pub use emitter::{Attribute, Comment, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};  pub use error::Error;  pub use tokenizer::{State, Tokenizer}; diff --git a/tests/test_spans.rs b/tests/test_spans.rs index b41b1b9..21882a3 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -105,6 +105,39 @@ fn attribute_value_span() {      "###);  } +#[test] +fn comment_proper_data_span() { +    let html = "<!-- Why are you looking at the source code? -->"; +    let Token::Comment(comment) = tokenizer(html).next().unwrap() else { +        panic!("expected comment"); +    }; +    // FIXME: this span is wrong (starts one byte too soon) +    assert_eq!(comment.data, html[1..][comment.data_span()]); +    let labels = vec![(comment.data_span(), "")]; +    assert_snapshot!(annotate(html, labels), @r###" +    <!-- Why are you looking at the source code? --> +       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +    "###); +} + +#[test] +fn comment_bogus_data_span() { +    let html = "<! Why are you looking at the source code? -->"; +    let Token::Comment(comment) = tokenizer(html) +        .filter(|t| !matches!(t, Token::Error { .. })) +        .next() +        .unwrap() +    else { +        panic!("expected comment"); +    }; +    assert_eq!(comment.data, html[comment.data_span()]); +    let labels = vec![(comment.data_span(), "")]; +    assert_snapshot!(annotate(html, labels), @r###" +    <! Why are you looking at the source code? --> +      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +    "###); +} +  fn annotate_errors(html: &'static str) -> String {      let mut labels = Vec::new();      for token in tokenizer(html) { | 
