From 76408590349f7f132c1dfeb9db3fb1dea964227c Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Tue, 30 Nov 2021 18:32:30 +0100 Subject: spans: support attribute names --- src/emitter.rs | 21 +++++++++++++++++++-- src/lib.rs | 2 +- src/spans.rs | 22 ++++++++++++++++------ tests/span-tests/demo.html | 2 ++ tests/span-tests/demo.out | 12 ++++++++---- tests/test_html5lib.rs | 28 +++++++++++++++++++++++++--- tests/test_spans.rs | 17 +++++++++++++++-- 7 files changed, 86 insertions(+), 18 deletions(-) diff --git a/src/emitter.rs b/src/emitter.rs index 20bcba4..d37c8f8 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -213,7 +213,11 @@ impl DefaultEmitter { .and_modify(|_| { error = Some(Error::DuplicateAttribute); }) - .or_insert(v); + .or_insert(Attribute { + value: v, + name_span: (), + value_span: (), + }); if let Some(e) = error { self.emit_error(e); @@ -444,7 +448,7 @@ pub struct StartTag { /// /// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own /// [`Emitter`] to tweak this behavior. - pub attributes: BTreeMap, + pub attributes: BTreeMap>, /// The source code span of the tag name. pub name_span: S, @@ -467,6 +471,19 @@ impl StartTag { } } +/// A HTML attribute value (plus spans). +#[derive(Debug, Default, Eq, PartialEq)] +pub struct Attribute { + /// The value of the attribute. + pub value: String, + + /// The source code span of the attribute name. + pub name_span: S, + + /// The source code span of the attribute value. + pub value_span: S, +} + /// A HTML end/close tag, such as `

` or ``. #[derive(Debug, Default, Eq, PartialEq)] pub struct EndTag { diff --git a/src/lib.rs b/src/lib.rs index 4b74d61..c601147 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,7 +16,7 @@ mod utils; #[cfg(feature = "integration-tests")] pub use utils::State as InternalState; -pub use emitter::{DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; +pub use emitter::{Attribute, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; pub use error::Error; pub use never::Never; pub use reader::{BufReadReader, Readable, Reader, StringReader}; diff --git a/src/spans.rs b/src/spans.rs index 72b30c0..300d659 100644 --- a/src/spans.rs +++ b/src/spans.rs @@ -5,7 +5,7 @@ use std::{ mem, }; -use crate::{Doctype, Emitter, EndTag, Error, Reader, StartTag, Token}; +use crate::{Attribute, Doctype, Emitter, EndTag, Error, Reader, StartTag, Token}; type Span = std::ops::Range; @@ -58,7 +58,7 @@ pub struct SpanEmitter { current_characters: String, current_token: Option>, last_start_tag: String, - current_attribute: Option<(String, String)>, + current_attribute: Option<(String, Attribute)>, seen_attributes: BTreeSet, emitted_tokens: VecDeque>, reader: PhantomData, @@ -262,15 +262,25 @@ impl Emitter for SpanEmitter { })); } - fn init_attribute(&mut self, _reader: &R) { + fn init_attribute(&mut self, reader: &R) { self.flush_current_attribute(); - self.current_attribute = Some((String::new(), String::new())); + self.current_attribute = Some(( + String::new(), + Attribute { + name_span: reader.get_pos() - 1..reader.get_pos() - 1, + ..Default::default() + }, + )); } fn push_attribute_name(&mut self, s: &str) { - self.current_attribute.as_mut().unwrap().0.push_str(s); + let current_attr = self.current_attribute.as_mut().unwrap(); + current_attr.0.push_str(s); + current_attr.1.name_span.end += s.len(); } fn push_attribute_value(&mut self, s: &str) { - self.current_attribute.as_mut().unwrap().1.push_str(s); + let current_attr = self.current_attribute.as_mut().unwrap(); + current_attr.1.value.push_str(s); + current_attr.1.value_span.end += s.len(); } fn set_doctype_public_identifier(&mut self, value: &str) { if let Some(Token::Doctype(Doctype { diff --git a/tests/span-tests/demo.html b/tests/span-tests/demo.html index 900f74c..53bbab2 100644 --- a/tests/span-tests/demo.html +++ b/tests/span-tests/demo.html @@ -1 +1,3 @@ this is a tag:

test

+ +tags can have attributes:
diff --git a/tests/span-tests/demo.out b/tests/span-tests/demo.out index 98d6766..2b69ce5 100644 --- a/tests/span-tests/demo.out +++ b/tests/span-tests/demo.out @@ -1,7 +1,11 @@ note: - ┌─ test.html:1:17 + ┌─ test.html:1:1 │ 1 │ this is a tag:

test

- │ ^^ ^^ end tag - │ │ - │ start tag + │ ^^^^^^ ^^ ^^ end tag + │ │ │ + │ │ start tag + │ attr value +2 │ +3 │ tags can have attributes:
+ │ ^^ attr name diff --git a/tests/test_html5lib.rs b/tests/test_html5lib.rs index 5668217..662f3c5 100644 --- a/tests/test_html5lib.rs +++ b/tests/test_html5lib.rs @@ -1,5 +1,5 @@ use html5gum::{ - Doctype, EndTag, Error, InternalState as State, Reader, StartTag, Token, Tokenizer, + Attribute, Doctype, EndTag, Error, InternalState as State, Reader, StartTag, Token, Tokenizer, }; use pretty_assertions::assert_eq; use serde::{de::Error as _, Deserialize}; @@ -79,14 +79,36 @@ impl<'de> Deserialize<'de> for ExpectedOutputTokens { OutputToken::StartTag(_, name, attributes) => Token::StartTag(StartTag { self_closing: false, name, - attributes, + attributes: attributes + .into_iter() + .map(|(k, v)| { + ( + k, + Attribute { + value: v, + ..Default::default() + }, + ) + }) + .collect(), name_span: (), }), OutputToken::StartTag2(_, name, attributes, self_closing) => { Token::StartTag(StartTag { self_closing, name, - attributes, + attributes: attributes + .into_iter() + .map(|(k, v)| { + ( + k, + Attribute { + value: v, + ..Default::default() + }, + ) + }) + .collect(), name_span: (), }) } diff --git a/tests/test_spans.rs b/tests/test_spans.rs index a3d1c96..9cc745c 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -29,9 +29,22 @@ fn test() { .infallible() { if let Token::StartTag(tag) = token { - labels.push(Label::primary(file_id, tag.name_span).with_message("start tag")); + if tag.name == "h1" { + labels.push(Label::primary(file_id, tag.name_span).with_message("start tag")); + } else { + for attr in tag.attributes.values() { + labels.push( + Label::primary(file_id, attr.name_span.clone()).with_message("attr name"), + ); + labels.push( + Label::primary(file_id, attr.value_span.clone()).with_message("attr value"), + ); + } + } } else if let Token::EndTag(tag) = token { - labels.push(Label::primary(file_id, tag.name_span).with_message("end tag")); + if tag.name == "h1" { + labels.push(Label::primary(file_id, tag.name_span).with_message("end tag")); + } } } -- cgit v1.2.3