diff options
author | Martin Fischer <martin@push-f.com> | 2021-11-30 18:32:30 +0100 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2021-12-05 02:52:36 +0100 |
commit | 76408590349f7f132c1dfeb9db3fb1dea964227c (patch) | |
tree | 2e88df0b737ec3225937630c758502400753f87b | |
parent | c29558e8d165df1d3fc1cb32079da9e40f957dc6 (diff) |
spans: support attribute names
-rw-r--r-- | src/emitter.rs | 21 | ||||
-rw-r--r-- | src/lib.rs | 2 | ||||
-rw-r--r-- | src/spans.rs | 22 | ||||
-rw-r--r-- | tests/span-tests/demo.html | 2 | ||||
-rw-r--r-- | tests/span-tests/demo.out | 12 | ||||
-rw-r--r-- | tests/test_html5lib.rs | 28 | ||||
-rw-r--r-- | tests/test_spans.rs | 17 |
7 files changed, 86 insertions, 18 deletions
diff --git a/src/emitter.rs b/src/emitter.rs index 20bcba4..d37c8f8 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -213,7 +213,11 @@ impl<R> DefaultEmitter<R, ()> { .and_modify(|_| { error = Some(Error::DuplicateAttribute); }) - .or_insert(v); + .or_insert(Attribute { + value: v, + name_span: (), + value_span: (), + }); if let Some(e) = error { self.emit_error(e); @@ -444,7 +448,7 @@ pub struct StartTag<S> { /// /// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own /// [`Emitter`] to tweak this behavior. - pub attributes: BTreeMap<String, String>, + pub attributes: BTreeMap<String, Attribute<S>>, /// The source code span of the tag name. pub name_span: S, @@ -467,6 +471,19 @@ impl<S> StartTag<S> { } } +/// A HTML attribute value (plus spans). +#[derive(Debug, Default, Eq, PartialEq)] +pub struct Attribute<S> { + /// The value of the attribute. + pub value: String, + + /// The source code span of the attribute name. + pub name_span: S, + + /// The source code span of the attribute value. + pub value_span: S, +} + /// A HTML end/close tag, such as `</p>` or `</a>`. #[derive(Debug, Default, Eq, PartialEq)] pub struct EndTag<S> { @@ -16,7 +16,7 @@ mod utils; #[cfg(feature = "integration-tests")] pub use utils::State as InternalState; -pub use emitter::{DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; +pub use emitter::{Attribute, DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token}; pub use error::Error; pub use never::Never; pub use reader::{BufReadReader, Readable, Reader, StringReader}; diff --git a/src/spans.rs b/src/spans.rs index 72b30c0..300d659 100644 --- a/src/spans.rs +++ b/src/spans.rs @@ -5,7 +5,7 @@ use std::{ mem, }; -use crate::{Doctype, Emitter, EndTag, Error, Reader, StartTag, Token}; +use crate::{Attribute, Doctype, Emitter, EndTag, Error, Reader, StartTag, Token}; type Span = std::ops::Range<usize>; @@ -58,7 +58,7 @@ pub struct SpanEmitter<R> { current_characters: String, current_token: Option<Token<Span>>, last_start_tag: String, - current_attribute: Option<(String, String)>, + current_attribute: Option<(String, Attribute<Span>)>, seen_attributes: BTreeSet<String>, emitted_tokens: VecDeque<Token<Span>>, reader: PhantomData<R>, @@ -262,15 +262,25 @@ impl<R: GetPos> Emitter<R> for SpanEmitter<R> { })); } - fn init_attribute(&mut self, _reader: &R) { + fn init_attribute(&mut self, reader: &R) { self.flush_current_attribute(); - self.current_attribute = Some((String::new(), String::new())); + self.current_attribute = Some(( + String::new(), + Attribute { + name_span: reader.get_pos() - 1..reader.get_pos() - 1, + ..Default::default() + }, + )); } fn push_attribute_name(&mut self, s: &str) { - self.current_attribute.as_mut().unwrap().0.push_str(s); + let current_attr = self.current_attribute.as_mut().unwrap(); + current_attr.0.push_str(s); + current_attr.1.name_span.end += s.len(); } fn push_attribute_value(&mut self, s: &str) { - self.current_attribute.as_mut().unwrap().1.push_str(s); + let current_attr = self.current_attribute.as_mut().unwrap(); + current_attr.1.value.push_str(s); + current_attr.1.value_span.end += s.len(); } fn set_doctype_public_identifier(&mut self, value: &str) { if let Some(Token::Doctype(Doctype { diff --git a/tests/span-tests/demo.html b/tests/span-tests/demo.html index 900f74c..53bbab2 100644 --- a/tests/span-tests/demo.html +++ b/tests/span-tests/demo.html @@ -1 +1,3 @@ this is a tag: <h1>test</h1> + +tags can have attributes: <div id = foobar> diff --git a/tests/span-tests/demo.out b/tests/span-tests/demo.out index 98d6766..2b69ce5 100644 --- a/tests/span-tests/demo.out +++ b/tests/span-tests/demo.out @@ -1,7 +1,11 @@ note: - ┌─ test.html:1:17 + ┌─ test.html:1:1 │ 1 │ this is a tag: <h1>test</h1> - │ ^^ ^^ end tag - │ │ - │ start tag + │ ^^^^^^ ^^ ^^ end tag + │ │ │ + │ │ start tag + │ attr value +2 │ +3 │ tags can have attributes: <div id = foobar> + │ ^^ attr name diff --git a/tests/test_html5lib.rs b/tests/test_html5lib.rs index 5668217..662f3c5 100644 --- a/tests/test_html5lib.rs +++ b/tests/test_html5lib.rs @@ -1,5 +1,5 @@ use html5gum::{ - Doctype, EndTag, Error, InternalState as State, Reader, StartTag, Token, Tokenizer, + Attribute, Doctype, EndTag, Error, InternalState as State, Reader, StartTag, Token, Tokenizer, }; use pretty_assertions::assert_eq; use serde::{de::Error as _, Deserialize}; @@ -79,14 +79,36 @@ impl<'de> Deserialize<'de> for ExpectedOutputTokens { OutputToken::StartTag(_, name, attributes) => Token::StartTag(StartTag { self_closing: false, name, - attributes, + attributes: attributes + .into_iter() + .map(|(k, v)| { + ( + k, + Attribute { + value: v, + ..Default::default() + }, + ) + }) + .collect(), name_span: (), }), OutputToken::StartTag2(_, name, attributes, self_closing) => { Token::StartTag(StartTag { self_closing, name, - attributes, + attributes: attributes + .into_iter() + .map(|(k, v)| { + ( + k, + Attribute { + value: v, + ..Default::default() + }, + ) + }) + .collect(), name_span: (), }) } diff --git a/tests/test_spans.rs b/tests/test_spans.rs index a3d1c96..9cc745c 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -29,9 +29,22 @@ fn test() { .infallible() { if let Token::StartTag(tag) = token { - labels.push(Label::primary(file_id, tag.name_span).with_message("start tag")); + if tag.name == "h1" { + labels.push(Label::primary(file_id, tag.name_span).with_message("start tag")); + } else { + for attr in tag.attributes.values() { + labels.push( + Label::primary(file_id, attr.name_span.clone()).with_message("attr name"), + ); + labels.push( + Label::primary(file_id, attr.value_span.clone()).with_message("attr value"), + ); + } + } } else if let Token::EndTag(tag) = token { - labels.push(Label::primary(file_id, tag.name_span).with_message("end tag")); + if tag.name == "h1" { + labels.push(Label::primary(file_id, tag.name_span).with_message("end tag")); + } } } |