aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2021-11-30 18:32:30 +0100
committerMartin Fischer <martin@push-f.com>2021-12-05 02:52:36 +0100
commit76408590349f7f132c1dfeb9db3fb1dea964227c (patch)
tree2e88df0b737ec3225937630c758502400753f87b
parentc29558e8d165df1d3fc1cb32079da9e40f957dc6 (diff)
spans: support attribute names
-rw-r--r--src/emitter.rs21
-rw-r--r--src/lib.rs2
-rw-r--r--src/spans.rs22
-rw-r--r--tests/span-tests/demo.html2
-rw-r--r--tests/span-tests/demo.out12
-rw-r--r--tests/test_html5lib.rs28
-rw-r--r--tests/test_spans.rs17
7 files changed, 86 insertions, 18 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index 20bcba4..d37c8f8 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -213,7 +213,11 @@ impl<R> DefaultEmitter<R, ()> {
.and_modify(|_| {
error = Some(Error::DuplicateAttribute);
})
- .or_insert(v);
+ .or_insert(Attribute {
+ value: v,
+ name_span: (),
+ value_span: (),
+ });
if let Some(e) = error {
self.emit_error(e);
@@ -444,7 +448,7 @@ pub struct StartTag<S> {
///
/// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own
/// [`Emitter`] to tweak this behavior.
- pub attributes: BTreeMap<String, String>,
+ pub attributes: BTreeMap<String, Attribute<S>>,
/// The source code span of the tag name.
pub name_span: S,
@@ -467,6 +471,19 @@ impl<S> StartTag<S> {
}
}
+/// A HTML attribute value (plus spans).
+#[derive(Debug, Default, Eq, PartialEq)]
+pub struct Attribute<S> {
+ /// The value of the attribute.
+ pub value: String,
+
+ /// The source code span of the attribute name.
+ pub name_span: S,
+
+ /// The source code span of the attribute value.
+ pub value_span: S,
+}
+
/// A HTML end/close tag, such as `</p>` or `</a>`.
#[derive(Debug, Default, Eq, PartialEq)]
pub struct EndTag<S> {
diff --git a/src/lib.rs b/src/lib.rs
index 4b74d61..c601147 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -16,7 +16,7 @@ mod utils;
#[cfg(feature = "integration-tests")]
pub use utils::State as InternalState;
-pub use emitter::{DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};
+pub use emitter::{Attribute, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};
pub use error::Error;
pub use never::Never;
pub use reader::{BufReadReader, Readable, Reader, StringReader};
diff --git a/src/spans.rs b/src/spans.rs
index 72b30c0..300d659 100644
--- a/src/spans.rs
+++ b/src/spans.rs
@@ -5,7 +5,7 @@ use std::{
mem,
};
-use crate::{Doctype, Emitter, EndTag, Error, Reader, StartTag, Token};
+use crate::{Attribute, Doctype, Emitter, EndTag, Error, Reader, StartTag, Token};
type Span = std::ops::Range<usize>;
@@ -58,7 +58,7 @@ pub struct SpanEmitter<R> {
current_characters: String,
current_token: Option<Token<Span>>,
last_start_tag: String,
- current_attribute: Option<(String, String)>,
+ current_attribute: Option<(String, Attribute<Span>)>,
seen_attributes: BTreeSet<String>,
emitted_tokens: VecDeque<Token<Span>>,
reader: PhantomData<R>,
@@ -262,15 +262,25 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> {
}));
}
- fn init_attribute(&mut self, _reader: &R) {
+ fn init_attribute(&mut self, reader: &R) {
self.flush_current_attribute();
- self.current_attribute = Some((String::new(), String::new()));
+ self.current_attribute = Some((
+ String::new(),
+ Attribute {
+ name_span: reader.get_pos() - 1..reader.get_pos() - 1,
+ ..Default::default()
+ },
+ ));
}
fn push_attribute_name(&mut self, s: &str) {
- self.current_attribute.as_mut().unwrap().0.push_str(s);
+ let current_attr = self.current_attribute.as_mut().unwrap();
+ current_attr.0.push_str(s);
+ current_attr.1.name_span.end += s.len();
}
fn push_attribute_value(&mut self, s: &str) {
- self.current_attribute.as_mut().unwrap().1.push_str(s);
+ let current_attr = self.current_attribute.as_mut().unwrap();
+ current_attr.1.value.push_str(s);
+ current_attr.1.value_span.end += s.len();
}
fn set_doctype_public_identifier(&mut self, value: &str) {
if let Some(Token::Doctype(Doctype {
diff --git a/tests/span-tests/demo.html b/tests/span-tests/demo.html
index 900f74c..53bbab2 100644
--- a/tests/span-tests/demo.html
+++ b/tests/span-tests/demo.html
@@ -1 +1,3 @@
this is a tag: <h1>test</h1>
+
+tags can have attributes: <div id = foobar>
diff --git a/tests/span-tests/demo.out b/tests/span-tests/demo.out
index 98d6766..2b69ce5 100644
--- a/tests/span-tests/demo.out
+++ b/tests/span-tests/demo.out
@@ -1,7 +1,11 @@
note:
- ┌─ test.html:1:17
+ ┌─ test.html:1:1
1 │ this is a tag: <h1>test</h1>
- │ ^^ ^^ end tag
- │ │
- │ start tag
+ │ ^^^^^^ ^^ ^^ end tag
+ │ │ │
+ │ │ start tag
+ │ attr value
+2 │
+3 │ tags can have attributes: <div id = foobar>
+ │ ^^ attr name
diff --git a/tests/test_html5lib.rs b/tests/test_html5lib.rs
index 5668217..662f3c5 100644
--- a/tests/test_html5lib.rs
+++ b/tests/test_html5lib.rs
@@ -1,5 +1,5 @@
use html5gum::{
- Doctype, EndTag, Error, InternalState as State, Reader, StartTag, Token, Tokenizer,
+ Attribute, Doctype, EndTag, Error, InternalState as State, Reader, StartTag, Token, Tokenizer,
};
use pretty_assertions::assert_eq;
use serde::{de::Error as _, Deserialize};
@@ -79,14 +79,36 @@ impl<'de> Deserialize<'de> for ExpectedOutputTokens {
OutputToken::StartTag(_, name, attributes) => Token::StartTag(StartTag {
self_closing: false,
name,
- attributes,
+ attributes: attributes
+ .into_iter()
+ .map(|(k, v)| {
+ (
+ k,
+ Attribute {
+ value: v,
+ ..Default::default()
+ },
+ )
+ })
+ .collect(),
name_span: (),
}),
OutputToken::StartTag2(_, name, attributes, self_closing) => {
Token::StartTag(StartTag {
self_closing,
name,
- attributes,
+ attributes: attributes
+ .into_iter()
+ .map(|(k, v)| {
+ (
+ k,
+ Attribute {
+ value: v,
+ ..Default::default()
+ },
+ )
+ })
+ .collect(),
name_span: (),
})
}
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index a3d1c96..9cc745c 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -29,9 +29,22 @@ fn test() {
.infallible()
{
if let Token::StartTag(tag) = token {
- labels.push(Label::primary(file_id, tag.name_span).with_message("start tag"));
+ if tag.name == "h1" {
+ labels.push(Label::primary(file_id, tag.name_span).with_message("start tag"));
+ } else {
+ for attr in tag.attributes.values() {
+ labels.push(
+ Label::primary(file_id, attr.name_span.clone()).with_message("attr name"),
+ );
+ labels.push(
+ Label::primary(file_id, attr.value_span.clone()).with_message("attr value"),
+ );
+ }
+ }
} else if let Token::EndTag(tag) = token {
- labels.push(Label::primary(file_id, tag.name_span).with_message("end tag"));
+ if tag.name == "h1" {
+ labels.push(Label::primary(file_id, tag.name_span).with_message("end tag"));
+ }
}
}