aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/emitter.rs83
-rw-r--r--src/spans.rs347
-rw-r--r--tests/test_spans.rs9
3 files changed, 108 insertions, 331 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index fe98c43..e872b1f 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -5,6 +5,7 @@ use std::collections::VecDeque;
use std::marker::PhantomData;
use std::mem;
+use crate::spans::Span;
use crate::Error;
use crate::State;
@@ -189,6 +190,7 @@ pub struct DefaultEmitter<R, S> {
seen_attributes: BTreeSet<String>,
emitted_tokens: VecDeque<Token<S>>,
reader: PhantomData<R>,
+ attr_in_end_tag_span: Option<S>,
}
impl<R, S> Default for DefaultEmitter<R, S> {
@@ -201,12 +203,13 @@ impl<R, S> Default for DefaultEmitter<R, S> {
seen_attributes: BTreeSet::new(),
emitted_tokens: VecDeque::new(),
reader: PhantomData::default(),
+ attr_in_end_tag_span: None,
}
}
}
-impl<R> DefaultEmitter<R, ()> {
- fn emit_token(&mut self, token: Token<()>) {
+impl<R, S: Span<R>> DefaultEmitter<R, S> {
+ fn emit_token(&mut self, token: Token<S>) {
self.flush_current_characters();
self.emitted_tokens.push_front(token);
}
@@ -219,12 +222,13 @@ impl<R> DefaultEmitter<R, ()> {
vacant.insert(v);
}
Entry::Occupied(_) => {
- self.push_error(Error::DuplicateAttribute);
+ self.push_error(Error::DuplicateAttribute, v.name_span);
}
},
Some(Token::EndTag(_)) => {
+ self.attr_in_end_tag_span = Some(v.name_span.clone());
if !self.seen_attributes.insert(k) {
- self.push_error(Error::DuplicateAttribute);
+ self.push_error(Error::DuplicateAttribute, v.name_span);
}
}
_ => {
@@ -243,16 +247,15 @@ impl<R> DefaultEmitter<R, ()> {
self.emit_token(Token::String(s));
}
- fn push_error(&mut self, error: Error) {
+ fn push_error(&mut self, error: Error, span: S) {
// bypass character flushing in self.emit_token: we don't need the error location to be
// that exact
- self.emitted_tokens
- .push_front(Token::Error { error, span: () });
+ self.emitted_tokens.push_front(Token::Error { error, span });
}
}
-impl<R> Emitter<R> for DefaultEmitter<R, ()> {
- type Token = Token<()>;
+impl<R, S: Span<R>> Emitter<R> for DefaultEmitter<R, S> {
+ type Token = Token<S>;
fn set_last_start_tag(&mut self, last_start_tag: Option<&str>) {
self.last_start_tag.clear();
@@ -264,8 +267,8 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {
self.flush_current_characters();
}
- fn emit_error(&mut self, error: Error, _reader: &R) {
- self.push_error(error);
+ fn emit_error(&mut self, error: Error, reader: &R) {
+ self.push_error(error, S::from_reader(reader));
}
fn pop_token(&mut self) -> Option<Self::Token> {
@@ -276,11 +279,17 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {
self.current_characters.push_str(s);
}
- fn init_start_tag(&mut self, _reader: &R) {
- self.current_token = Some(Token::StartTag(Default::default()));
+ fn init_start_tag(&mut self, reader: &R) {
+ self.current_token = Some(Token::StartTag(StartTag {
+ name_span: S::from_reader(reader),
+ ..Default::default()
+ }));
}
- fn init_end_tag(&mut self, _reader: &R) {
- self.current_token = Some(Token::EndTag(Default::default()));
+ fn init_end_tag(&mut self, reader: &R) {
+ self.current_token = Some(Token::EndTag(EndTag {
+ name_span: S::from_reader(reader),
+ ..Default::default()
+ }));
self.seen_attributes.clear();
}
@@ -293,7 +302,8 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {
match token {
Token::EndTag(_) => {
if !self.seen_attributes.is_empty() {
- self.push_error(Error::EndTagWithAttributes);
+ let span = self.attr_in_end_tag_span.take().unwrap();
+ self.push_error(Error::EndTagWithAttributes, span);
}
self.seen_attributes.clear();
}
@@ -316,7 +326,7 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {
self.emit_token(doctype);
}
- fn set_self_closing(&mut self, _reader: &R) {
+ fn set_self_closing(&mut self, reader: &R) {
let tag = self.current_token.as_mut().unwrap();
match tag {
Token::StartTag(StartTag {
@@ -326,7 +336,7 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {
*self_closing = true;
}
Token::EndTag(_) => {
- self.push_error(Error::EndTagWithTrailingSolidus);
+ self.emit_error(Error::EndTagWithTrailingSolidus, reader);
}
_ => {
debug_assert!(false);
@@ -341,11 +351,21 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {
}
fn push_tag_name(&mut self, s: &str) {
match self.current_token {
- Some(Token::StartTag(StartTag { ref mut name, .. })) => {
+ Some(Token::StartTag(StartTag {
+ ref mut name,
+ ref mut name_span,
+ ..
+ })) => {
name.push_str(s);
+ name_span.push_str(s);
}
- Some(Token::EndTag(EndTag { ref mut name, .. })) => {
+ Some(Token::EndTag(EndTag {
+ ref mut name,
+ ref mut name_span,
+ ..
+ })) => {
name.push_str(s);
+ name_span.push_str(s);
}
_ => debug_assert!(false),
}
@@ -373,15 +393,30 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {
}));
}
- fn init_attribute_name(&mut self, _reader: &R) {
+ fn init_attribute_name(&mut self, reader: &R) {
self.flush_current_attribute();
- self.current_attribute = Some((String::new(), Attribute::default()));
+ self.current_attribute = Some((
+ String::new(),
+ Attribute {
+ name_span: S::from_reader(reader),
+ ..Default::default()
+ },
+ ));
+ }
+ fn init_attribute_value(&mut self, reader: &R, quoted: bool) {
+ self.current_attribute.as_mut().unwrap().1.value_span =
+ S::from_reader_with_offset(reader, quoted as usize);
}
+
fn push_attribute_name(&mut self, s: &str) {
- self.current_attribute.as_mut().unwrap().0.push_str(s);
+ let current_attr = self.current_attribute.as_mut().unwrap();
+ current_attr.0.push_str(s);
+ current_attr.1.name_span.push_str(s);
}
fn push_attribute_value(&mut self, s: &str) {
- self.current_attribute.as_mut().unwrap().1.value.push_str(s);
+ let current_attr = self.current_attribute.as_mut().unwrap();
+ current_attr.1.value.push_str(s);
+ current_attr.1.value_span.push_str(s);
}
fn set_doctype_public_identifier(&mut self, value: &str) {
if let Some(Token::Doctype(Doctype {
diff --git a/src/spans.rs b/src/spans.rs
index c582457..88d5eed 100644
--- a/src/spans.rs
+++ b/src/spans.rs
@@ -1,13 +1,18 @@
//! Source code spans.
-use std::{
- collections::{btree_map::Entry, BTreeSet, VecDeque},
- marker::PhantomData,
- mem,
-};
+//!
+//! The [`DefaultEmitter`](crate::DefaultEmitter) is generic over a [`Span`].
+//! This library comes with two Span implementations:
+//!
+//! * one for `()` which acts as the no-op implementation for when you don't want to track spans
+//! * one for [`Range<usize>`] for when you do want to track spans
+//!
+//! To use the latter your reader however has to implement [`GetPos`].
+//! You can easily use any existing reader by wrapping it in the [`PosTracker`] struct
+//! which implements the [`GetPos`] trait and takes care of tracking the current position.
-use crate::{Attribute, Doctype, Emitter, EndTag, Error, Reader, StartTag, Token};
+use std::ops::Range;
-type Span = std::ops::Range<usize>;
+use crate::Reader;
/// A trait to be implemented by readers that track their own position.
pub trait GetPos {
@@ -29,320 +34,60 @@ impl<R> GetPos for PosTracker<R> {
}
}
-impl<R: Reader> Reader for PosTracker<R> {
- type Error = R::Error;
-
- fn read_char(&mut self) -> Result<Option<char>, Self::Error> {
- match self.reader.read_char()? {
- Some(char) => {
- self.position += char.len_utf8();
- Ok(Some(char))
- }
- None => Ok(None),
- }
- }
+/// Represents a character range in the source code.
+pub trait Span<R>: Default + Clone {
+ /// Initializes a new span at the current position of the reader.
+ fn from_reader(reader: &R) -> Self;
- fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> {
- match self.reader.try_read_string(s, case_sensitive)? {
- true => {
- self.position += s.len();
- Ok(true)
- }
- false => Ok(false),
- }
- }
-}
-
-/// The default implementation of [`crate::Emitter`], used to produce ("emit") tokens.
-pub struct SpanEmitter<R> {
- current_characters: String,
- current_token: Option<Token<Span>>,
- last_start_tag: String,
- current_attribute: Option<(String, Attribute<Span>)>,
- seen_attributes: BTreeSet<String>,
- emitted_tokens: VecDeque<Token<Span>>,
- reader: PhantomData<R>,
- attr_in_end_tag_span: Option<Span>,
-}
+ /// Initializes a new span at the current position of the reader with the given offset.
+ fn from_reader_with_offset(reader: &R, offset: usize) -> Self;
-impl<R> Default for SpanEmitter<R> {
- fn default() -> Self {
- SpanEmitter {
- current_characters: String::new(),
- current_token: None,
- last_start_tag: String::new(),
- current_attribute: None,
- seen_attributes: BTreeSet::new(),
- emitted_tokens: VecDeque::new(),
- reader: PhantomData::default(),
- attr_in_end_tag_span: None,
- }
- }
+ /// Extends the span by the length of the given string.
+ fn push_str(&mut self, str: &str);
}
-impl<R: GetPos> SpanEmitter<R> {
- fn emit_token(&mut self, token: Token<Span>) {
- self.flush_current_characters();
- self.emitted_tokens.push_front(token);
- }
-
- fn flush_current_attribute(&mut self) {
- if let Some((k, v)) = self.current_attribute.take() {
- match self.current_token {
- Some(Token::StartTag(ref mut tag)) => match tag.attributes.entry(k) {
- Entry::Vacant(vacant) => {
- vacant.insert(v);
- }
- Entry::Occupied(_) => {
- self.emit_error_span(Error::DuplicateAttribute, v.name_span);
- }
- },
- Some(Token::EndTag(_)) => {
- self.attr_in_end_tag_span = Some(v.name_span.clone());
- if !self.seen_attributes.insert(k) {
- self.emit_error_span(Error::DuplicateAttribute, v.name_span);
- }
- }
- _ => {
- debug_assert!(false);
- }
- }
- }
- }
-
- fn flush_current_characters(&mut self) {
- if self.current_characters.is_empty() {
- return;
- }
+impl<R> Span<R> for () {
+ fn from_reader(_reader: &R) -> Self {}
- let s = mem::take(&mut self.current_characters);
- self.emit_token(Token::String(s));
- }
+ fn from_reader_with_offset(_reader: &R, _offset: usize) -> Self {}
- fn emit_error_span(&mut self, error: Error, span: Span) {
- // bypass character flushing in self.emit_token: we don't need the error location to be
- // that exact
- self.emitted_tokens.push_front(Token::Error { error, span });
- }
+ fn push_str(&mut self, _str: &str) {}
}
-impl<R: GetPos> Emitter<R> for SpanEmitter<R> {
- type Token = Token<Span>;
-
- fn set_last_start_tag(&mut self, last_start_tag: Option<&str>) {
- self.last_start_tag.clear();
- self.last_start_tag
- .push_str(last_start_tag.unwrap_or_default());
+impl<P: GetPos> Span<P> for Range<usize> {
+ fn from_reader(reader: &P) -> Self {
+ reader.get_pos() - 1..reader.get_pos() - 1
}
- fn emit_eof(&mut self) {
- self.flush_current_characters();
+ fn from_reader_with_offset(reader: &P, offset: usize) -> Self {
+ reader.get_pos() - 1 + offset..reader.get_pos() - 1 + offset
}
- fn emit_error(&mut self, error: Error, reader: &R) {
- self.emit_error_span(error, reader.get_pos() - 1..reader.get_pos() - 1)
- }
-
- fn pop_token(&mut self) -> Option<Self::Token> {
- self.emitted_tokens.pop_back()
- }
-
- fn emit_string(&mut self, s: &str) {
- self.current_characters.push_str(s);
- }
-
- fn init_start_tag(&mut self, reader: &R) {
- self.current_token = Some(Token::StartTag(StartTag {
- name_span: reader.get_pos() - 1..reader.get_pos() - 1,
- ..Default::default()
- }));
- }
- fn init_end_tag(&mut self, reader: &R) {
- self.current_token = Some(Token::EndTag(EndTag {
- name_span: reader.get_pos() - 1..reader.get_pos() - 1,
- ..Default::default()
- }));
- self.seen_attributes.clear();
- }
-
- fn init_comment(&mut self, _reader: &R) {
- self.current_token = Some(Token::Comment(String::new()));
- }
- fn emit_current_tag(&mut self) {
- self.flush_current_attribute();
- let mut token = self.current_token.take().unwrap();
- match token {
- Token::EndTag(_) => {
- if !self.seen_attributes.is_empty() {
- let span = self.attr_in_end_tag_span.take().unwrap();
- self.emit_error_span(Error::EndTagWithAttributes, span);
- }
- self.seen_attributes.clear();
- }
- Token::StartTag(ref mut _tag) => {
- self.set_last_start_tag(Some(&_tag.name));
- }
- _ => debug_assert!(false),
- }
- self.emit_token(token);
- }
- fn emit_current_comment(&mut self) {
- let comment = self.current_token.take().unwrap();
- debug_assert!(matches!(comment, Token::Comment(_)));
- self.emit_token(comment);
+ fn push_str(&mut self, str: &str) {
+ self.end += str.len();
}
+}
- fn emit_current_doctype(&mut self) {
- let doctype = self.current_token.take().unwrap();
- debug_assert!(matches!(doctype, Token::Doctype(_)));
- self.emit_token(doctype);
- }
+impl<R: Reader> Reader for PosTracker<R> {
+ type Error = R::Error;
- fn set_self_closing(&mut self, reader: &R) {
- let tag = self.current_token.as_mut().unwrap();
- match tag {
- Token::StartTag(StartTag {
- ref mut self_closing,
- ..
- }) => {
- *self_closing = true;
- }
- Token::EndTag(_) => {
- self.emit_error(Error::EndTagWithTrailingSolidus, reader);
- }
- _ => {
- debug_assert!(false);
- }
- }
- }
- fn set_force_quirks(&mut self) {
- match self.current_token {
- Some(Token::Doctype(ref mut doctype)) => doctype.force_quirks = true,
- _ => debug_assert!(false),
- }
- }
- fn push_tag_name(&mut self, s: &str) {
- match self.current_token {
- Some(Token::StartTag(StartTag {
- ref mut name,
- ref mut name_span,
- ..
- })) => {
- name.push_str(s);
- name_span.end += s.len();
- }
- Some(Token::EndTag(EndTag {
- ref mut name,
- ref mut name_span,
- ..
- })) => {
- name.push_str(s);
- name_span.end += s.len();
+ fn read_char(&mut self) -> Result<Option<char>, Self::Error> {
+ match self.reader.read_char()? {
+ Some(char) => {
+ self.position += char.len_utf8();
+ Ok(Some(char))
}
- _ => debug_assert!(false),
- }
- }
-
- fn push_comment(&mut self, s: &str) {
- match self.current_token {
- Some(Token::Comment(ref mut data)) => data.push_str(s),
- _ => debug_assert!(false),
- }
- }
-
- fn push_doctype_name(&mut self, s: &str) {
- match self.current_token {
- Some(Token::Doctype(ref mut doctype)) => doctype.name.push_str(s),
- _ => debug_assert!(false),
- }
- }
- fn init_doctype(&mut self, _reader: &R) {
- self.current_token = Some(Token::Doctype(Doctype {
- name: String::new(),
- force_quirks: false,
- public_identifier: None,
- system_identifier: None,
- }));
- }
-
- fn init_attribute_name(&mut self, reader: &R) {
- self.flush_current_attribute();
- self.current_attribute = Some((
- String::new(),
- Attribute {
- name_span: reader.get_pos() - 1..reader.get_pos() - 1,
- ..Default::default()
- },
- ));
- }
-
- fn init_attribute_value(&mut self, reader: &R, quoted: bool) {
- let current_attr = self.current_attribute.as_mut().unwrap();
- let offset = if quoted { 0 } else { 1 };
- current_attr.1.value_span = reader.get_pos() - offset..reader.get_pos() - offset;
- }
-
- fn push_attribute_name(&mut self, s: &str) {
- let current_attr = self.current_attribute.as_mut().unwrap();
- current_attr.0.push_str(s);
- current_attr.1.name_span.end += s.len();
- }
- fn push_attribute_value(&mut self, s: &str) {
- let current_attr = self.current_attribute.as_mut().unwrap();
- current_attr.1.value.push_str(s);
- current_attr.1.value_span.end += s.len();
- }
- fn set_doctype_public_identifier(&mut self, value: &str) {
- if let Some(Token::Doctype(Doctype {
- ref mut public_identifier,
- ..
- })) = self.current_token
- {
- *public_identifier = Some(value.to_owned());
- } else {
- debug_assert!(false);
- }
- }
- fn set_doctype_system_identifier(&mut self, value: &str) {
- if let Some(Token::Doctype(Doctype {
- ref mut system_identifier,
- ..
- })) = self.current_token
- {
- *system_identifier = Some(value.to_owned());
- } else {
- debug_assert!(false);
- }
- }
- fn push_doctype_public_identifier(&mut self, s: &str) {
- if let Some(Token::Doctype(Doctype {
- public_identifier: Some(ref mut id),
- ..
- })) = self.current_token
- {
- id.push_str(s);
- } else {
- debug_assert!(false);
- }
- }
- fn push_doctype_system_identifier(&mut self, s: &str) {
- if let Some(Token::Doctype(Doctype {
- system_identifier: Some(ref mut id),
- ..
- })) = self.current_token
- {
- id.push_str(s);
- } else {
- debug_assert!(false);
+ None => Ok(None),
}
}
- fn current_is_appropriate_end_tag_token(&mut self) -> bool {
- match self.current_token {
- Some(Token::EndTag(ref tag)) => {
- !self.last_start_tag.is_empty() && self.last_start_tag == tag.name
+ fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> {
+ match self.reader.try_read_string(s, case_sensitive)? {
+ true => {
+ self.position += s.len();
+ Ok(true)
}
- _ => false,
+ false => Ok(false),
}
}
}
diff --git a/tests/test_spans.rs b/tests/test_spans.rs
index aeb4a94..5b1e814 100644
--- a/tests/test_spans.rs
+++ b/tests/test_spans.rs
@@ -1,4 +1,4 @@
-use std::include_str;
+use std::{include_str, ops::Range};
use codespan_reporting::{
self,
@@ -6,10 +6,7 @@ use codespan_reporting::{
files::SimpleFiles,
term::{self, termcolor::Buffer},
};
-use html5gum::{
- spans::{PosTracker, SpanEmitter},
- Readable, Token, Tokenizer,
-};
+use html5gum::{spans::PosTracker, DefaultEmitter, Readable, StringReader, Token, Tokenizer};
#[test]
fn test() {
@@ -24,7 +21,7 @@ fn test() {
reader: html.to_reader(),
position: 0,
},
- SpanEmitter::default(),
+ DefaultEmitter::<PosTracker<StringReader>, Range<usize>>::default(),
)
.infallible()
{