aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-17 15:21:32 +0200
committerMartin Fischer <martin@push-f.com>2023-08-19 11:41:55 +0200
commit4d9cf7171836625b61dcfe675bdf9452766166c0 (patch)
tree90bc623a08b945439b6a5077bdfdf54de7eacc37
parentc169e78f120ea9be451f337306b8bff6c1fb4955 (diff)
feat!: add offset to comments
-rw-r--r--integration_tests/tests/test_html5lib.rs2
-rw-r--r--src/emitter.rs27
-rw-r--r--src/lib.rs2
-rw-r--r--tests/test_spans.rs33
4 files changed, 58 insertions, 6 deletions
diff --git a/integration_tests/tests/test_html5lib.rs b/integration_tests/tests/test_html5lib.rs
index 209e199..0f96063 100644
--- a/integration_tests/tests/test_html5lib.rs
+++ b/integration_tests/tests/test_html5lib.rs
@@ -135,7 +135,7 @@ fn run_test_inner<R: Reader>(
}),
Token::EndTag(tag) => actual.tokens.push(TestToken::EndTag { name: tag.name }),
Token::String(data) => actual.tokens.push(TestToken::Character(data)),
- Token::Comment(data) => actual.tokens.push(TestToken::Comment(data)),
+ Token::Comment(comment) => actual.tokens.push(TestToken::Comment(comment.data)),
Token::Doctype(doctype) => actual.tokens.push(TestToken::Doctype {
name: Some(doctype.name).filter(|name| !name.is_empty()),
public_id: doctype.public_identifier,
diff --git a/src/emitter.rs b/src/emitter.rs
index b3fdb99..caf7b55 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -270,8 +270,11 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {
self.seen_attributes.clear();
}
- fn init_comment(&mut self, _reader: &R) {
- self.current_token = Some(Token::Comment(String::new()));
+ fn init_comment(&mut self, reader: &R) {
+ self.current_token = Some(Token::Comment(Comment {
+ data: String::new(),
+ data_offset: reader.position(),
+ }));
}
fn emit_current_tag(&mut self) {
self.flush_current_attribute();
@@ -348,7 +351,7 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {
fn push_comment(&mut self, s: &str) {
match self.current_token {
- Some(Token::Comment(ref mut data)) => data.push_str(s),
+ Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s),
_ => debug_assert!(false),
}
}
@@ -483,6 +486,22 @@ pub struct EndTag<O> {
pub name_span: Range<O>,
}
+/// An HTML comment.
+#[derive(PartialEq, Eq, Debug)]
+pub struct Comment<O> {
+ /// The text within the comment.
+ pub data: String,
+ /// The source offset of the comment data.
+ pub data_offset: O,
+}
+
+impl<O: Offset> Comment<O> {
+ /// Calculates the span for the comment data and returns it.
+ pub fn data_span(&self) -> Range<O> {
+ self.data_offset..self.data_offset + self.data.len()
+ }
+}
+
/// A doctype. Some examples:
///
/// * `<!DOCTYPE {name}>`
@@ -515,7 +534,7 @@ pub enum Token<O> {
/// A literal string.
String(String),
/// A HTML comment.
- Comment(String),
+ Comment(Comment<O>),
/// A HTML doctype declaration.
Doctype(Doctype),
/// A HTML parsing error.
diff --git a/src/lib.rs b/src/lib.rs
index fd0349c..1105141 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,6 +15,6 @@ mod utils;
#[cfg(feature = "integration-tests")]
pub use utils::State as InternalState;
-pub use emitter::{Attribute, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};
+pub use emitter::{Attribute, Comment, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};
pub use error::Error;
pub use tokenizer::{State, Tokenizer};
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index b41b1b9..21882a3 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -105,6 +105,39 @@ fn attribute_value_span() {
"###);
}
+#[test]
+fn comment_proper_data_span() {
+ let html = "<!-- Why are you looking at the source code? -->";
+ let Token::Comment(comment) = tokenizer(html).next().unwrap() else {
+ panic!("expected comment");
+ };
+ // FIXME: this span is wrong (starts one byte too soon)
+ assert_eq!(comment.data, html[1..][comment.data_span()]);
+ let labels = vec![(comment.data_span(), "")];
+ assert_snapshot!(annotate(html, labels), @r###"
+ <!-- Why are you looking at the source code? -->
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ "###);
+}
+
+#[test]
+fn comment_bogus_data_span() {
+ let html = "<! Why are you looking at the source code? -->";
+ let Token::Comment(comment) = tokenizer(html)
+ .filter(|t| !matches!(t, Token::Error { .. }))
+ .next()
+ .unwrap()
+ else {
+ panic!("expected comment");
+ };
+ assert_eq!(comment.data, html[comment.data_span()]);
+ let labels = vec![(comment.data_span(), "")];
+ assert_snapshot!(annotate(html, labels), @r###"
+ <! Why are you looking at the source code? -->
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ "###);
+}
+
fn annotate_errors(html: &'static str) -> String {
let mut labels = Vec::new();
for token in tokenizer(html) {