diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/emitter.rs | 83 | ||||
| -rw-r--r-- | src/spans.rs | 347 | 
2 files changed, 105 insertions, 325 deletions
| diff --git a/src/emitter.rs b/src/emitter.rs index fe98c43..e872b1f 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -5,6 +5,7 @@ use std::collections::VecDeque;  use std::marker::PhantomData;  use std::mem; +use crate::spans::Span;  use crate::Error;  use crate::State; @@ -189,6 +190,7 @@ pub struct DefaultEmitter<R, S> {      seen_attributes: BTreeSet<String>,      emitted_tokens: VecDeque<Token<S>>,      reader: PhantomData<R>, +    attr_in_end_tag_span: Option<S>,  }  impl<R, S> Default for DefaultEmitter<R, S> { @@ -201,12 +203,13 @@ impl<R, S> Default for DefaultEmitter<R, S> {              seen_attributes: BTreeSet::new(),              emitted_tokens: VecDeque::new(),              reader: PhantomData::default(), +            attr_in_end_tag_span: None,          }      }  } -impl<R> DefaultEmitter<R, ()> { -    fn emit_token(&mut self, token: Token<()>) { +impl<R, S: Span<R>> DefaultEmitter<R, S> { +    fn emit_token(&mut self, token: Token<S>) {          self.flush_current_characters();          self.emitted_tokens.push_front(token);      } @@ -219,12 +222,13 @@ impl<R> DefaultEmitter<R, ()> {                          vacant.insert(v);                      }                      Entry::Occupied(_) => { -                        self.push_error(Error::DuplicateAttribute); +                        self.push_error(Error::DuplicateAttribute, v.name_span);                      }                  },                  Some(Token::EndTag(_)) => { +                    self.attr_in_end_tag_span = Some(v.name_span.clone());                      if !self.seen_attributes.insert(k) { -                        self.push_error(Error::DuplicateAttribute); +                        self.push_error(Error::DuplicateAttribute, v.name_span);                      }                  }                  _ => { @@ -243,16 +247,15 @@ impl<R> DefaultEmitter<R, ()> {          self.emit_token(Token::String(s));      } -    fn push_error(&mut self, error: Error) { +    fn push_error(&mut self, error: Error, span: S) {          // bypass character flushing in self.emit_token: we don't need the error location to be          // that exact -        self.emitted_tokens -            .push_front(Token::Error { error, span: () }); +        self.emitted_tokens.push_front(Token::Error { error, span });      }  } -impl<R> Emitter<R> for DefaultEmitter<R, ()> { -    type Token = Token<()>; +impl<R, S: Span<R>> Emitter<R> for DefaultEmitter<R, S> { +    type Token = Token<S>;      fn set_last_start_tag(&mut self, last_start_tag: Option<&str>) {          self.last_start_tag.clear(); @@ -264,8 +267,8 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {          self.flush_current_characters();      } -    fn emit_error(&mut self, error: Error, _reader: &R) { -        self.push_error(error); +    fn emit_error(&mut self, error: Error, reader: &R) { +        self.push_error(error, S::from_reader(reader));      }      fn pop_token(&mut self) -> Option<Self::Token> { @@ -276,11 +279,17 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {          self.current_characters.push_str(s);      } -    fn init_start_tag(&mut self, _reader: &R) { -        self.current_token = Some(Token::StartTag(Default::default())); +    fn init_start_tag(&mut self, reader: &R) { +        self.current_token = Some(Token::StartTag(StartTag { +            name_span: S::from_reader(reader), +            ..Default::default() +        }));      } -    fn init_end_tag(&mut self, _reader: &R) { -        self.current_token = Some(Token::EndTag(Default::default())); +    fn init_end_tag(&mut self, reader: &R) { +        self.current_token = Some(Token::EndTag(EndTag { +            name_span: S::from_reader(reader), +            ..Default::default() +        }));          self.seen_attributes.clear();      } @@ -293,7 +302,8 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {          match token {              Token::EndTag(_) => {                  if !self.seen_attributes.is_empty() { -                    self.push_error(Error::EndTagWithAttributes); +                    let span = self.attr_in_end_tag_span.take().unwrap(); +                    self.push_error(Error::EndTagWithAttributes, span);                  }                  self.seen_attributes.clear();              } @@ -316,7 +326,7 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {          self.emit_token(doctype);      } -    fn set_self_closing(&mut self, _reader: &R) { +    fn set_self_closing(&mut self, reader: &R) {          let tag = self.current_token.as_mut().unwrap();          match tag {              Token::StartTag(StartTag { @@ -326,7 +336,7 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {                  *self_closing = true;              }              Token::EndTag(_) => { -                self.push_error(Error::EndTagWithTrailingSolidus); +                self.emit_error(Error::EndTagWithTrailingSolidus, reader);              }              _ => {                  debug_assert!(false); @@ -341,11 +351,21 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {      }      fn push_tag_name(&mut self, s: &str) {          match self.current_token { -            Some(Token::StartTag(StartTag { ref mut name, .. })) => { +            Some(Token::StartTag(StartTag { +                ref mut name, +                ref mut name_span, +                .. +            })) => {                  name.push_str(s); +                name_span.push_str(s);              } -            Some(Token::EndTag(EndTag { ref mut name, .. })) => { +            Some(Token::EndTag(EndTag { +                ref mut name, +                ref mut name_span, +                .. +            })) => {                  name.push_str(s); +                name_span.push_str(s);              }              _ => debug_assert!(false),          } @@ -373,15 +393,30 @@ impl<R> Emitter<R> for DefaultEmitter<R, ()> {          }));      } -    fn init_attribute_name(&mut self, _reader: &R) { +    fn init_attribute_name(&mut self, reader: &R) {          self.flush_current_attribute(); -        self.current_attribute = Some((String::new(), Attribute::default())); +        self.current_attribute = Some(( +            String::new(), +            Attribute { +                name_span: S::from_reader(reader), +                ..Default::default() +            }, +        )); +    } +    fn init_attribute_value(&mut self, reader: &R, quoted: bool) { +        self.current_attribute.as_mut().unwrap().1.value_span = +            S::from_reader_with_offset(reader, quoted as usize);      } +      fn push_attribute_name(&mut self, s: &str) { -        self.current_attribute.as_mut().unwrap().0.push_str(s); +        let current_attr = self.current_attribute.as_mut().unwrap(); +        current_attr.0.push_str(s); +        current_attr.1.name_span.push_str(s);      }      fn push_attribute_value(&mut self, s: &str) { -        self.current_attribute.as_mut().unwrap().1.value.push_str(s); +        let current_attr = self.current_attribute.as_mut().unwrap(); +        current_attr.1.value.push_str(s); +        current_attr.1.value_span.push_str(s);      }      fn set_doctype_public_identifier(&mut self, value: &str) {          if let Some(Token::Doctype(Doctype { diff --git a/src/spans.rs b/src/spans.rs index c582457..88d5eed 100644 --- a/src/spans.rs +++ b/src/spans.rs @@ -1,13 +1,18 @@  //! Source code spans. -use std::{ -    collections::{btree_map::Entry, BTreeSet, VecDeque}, -    marker::PhantomData, -    mem, -}; +//! +//! The [`DefaultEmitter`](crate::DefaultEmitter) is generic over a [`Span`]. +//! This library comes with two Span implementations: +//! +//! * one for `()` which acts as the no-op implementation for when you don't want to track spans +//! * one for [`Range<usize>`] for when you do want to track spans +//! +//! To use the latter your reader however has to implement [`GetPos`]. +//! You can easily use any existing reader by wrapping it in the [`PosTracker`] struct +//! which implements the [`GetPos`] trait and takes care of tracking the current position. -use crate::{Attribute, Doctype, Emitter, EndTag, Error, Reader, StartTag, Token}; +use std::ops::Range; -type Span = std::ops::Range<usize>; +use crate::Reader;  /// A trait to be implemented by readers that track their own position.  pub trait GetPos { @@ -29,320 +34,60 @@ impl<R> GetPos for PosTracker<R> {      }  } -impl<R: Reader> Reader for PosTracker<R> { -    type Error = R::Error; - -    fn read_char(&mut self) -> Result<Option<char>, Self::Error> { -        match self.reader.read_char()? { -            Some(char) => { -                self.position += char.len_utf8(); -                Ok(Some(char)) -            } -            None => Ok(None), -        } -    } +/// Represents a character range in the source code. +pub trait Span<R>: Default + Clone { +    /// Initializes a new span at the current position of the reader. +    fn from_reader(reader: &R) -> Self; -    fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> { -        match self.reader.try_read_string(s, case_sensitive)? { -            true => { -                self.position += s.len(); -                Ok(true) -            } -            false => Ok(false), -        } -    } -} - -/// The default implementation of [`crate::Emitter`], used to produce ("emit") tokens. -pub struct SpanEmitter<R> { -    current_characters: String, -    current_token: Option<Token<Span>>, -    last_start_tag: String, -    current_attribute: Option<(String, Attribute<Span>)>, -    seen_attributes: BTreeSet<String>, -    emitted_tokens: VecDeque<Token<Span>>, -    reader: PhantomData<R>, -    attr_in_end_tag_span: Option<Span>, -} +    /// Initializes a new span at the current position of the reader with the given offset. +    fn from_reader_with_offset(reader: &R, offset: usize) -> Self; -impl<R> Default for SpanEmitter<R> { -    fn default() -> Self { -        SpanEmitter { -            current_characters: String::new(), -            current_token: None, -            last_start_tag: String::new(), -            current_attribute: None, -            seen_attributes: BTreeSet::new(), -            emitted_tokens: VecDeque::new(), -            reader: PhantomData::default(), -            attr_in_end_tag_span: None, -        } -    } +    /// Extends the span by the length of the given string. +    fn push_str(&mut self, str: &str);  } -impl<R: GetPos> SpanEmitter<R> { -    fn emit_token(&mut self, token: Token<Span>) { -        self.flush_current_characters(); -        self.emitted_tokens.push_front(token); -    } - -    fn flush_current_attribute(&mut self) { -        if let Some((k, v)) = self.current_attribute.take() { -            match self.current_token { -                Some(Token::StartTag(ref mut tag)) => match tag.attributes.entry(k) { -                    Entry::Vacant(vacant) => { -                        vacant.insert(v); -                    } -                    Entry::Occupied(_) => { -                        self.emit_error_span(Error::DuplicateAttribute, v.name_span); -                    } -                }, -                Some(Token::EndTag(_)) => { -                    self.attr_in_end_tag_span = Some(v.name_span.clone()); -                    if !self.seen_attributes.insert(k) { -                        self.emit_error_span(Error::DuplicateAttribute, v.name_span); -                    } -                } -                _ => { -                    debug_assert!(false); -                } -            } -        } -    } - -    fn flush_current_characters(&mut self) { -        if self.current_characters.is_empty() { -            return; -        } +impl<R> Span<R> for () { +    fn from_reader(_reader: &R) -> Self {} -        let s = mem::take(&mut self.current_characters); -        self.emit_token(Token::String(s)); -    } +    fn from_reader_with_offset(_reader: &R, _offset: usize) -> Self {} -    fn emit_error_span(&mut self, error: Error, span: Span) { -        // bypass character flushing in self.emit_token: we don't need the error location to be -        // that exact -        self.emitted_tokens.push_front(Token::Error { error, span }); -    } +    fn push_str(&mut self, _str: &str) {}  } -impl<R: GetPos> Emitter<R> for SpanEmitter<R> { -    type Token = Token<Span>; - -    fn set_last_start_tag(&mut self, last_start_tag: Option<&str>) { -        self.last_start_tag.clear(); -        self.last_start_tag -            .push_str(last_start_tag.unwrap_or_default()); +impl<P: GetPos> Span<P> for Range<usize> { +    fn from_reader(reader: &P) -> Self { +        reader.get_pos() - 1..reader.get_pos() - 1      } -    fn emit_eof(&mut self) { -        self.flush_current_characters(); +    fn from_reader_with_offset(reader: &P, offset: usize) -> Self { +        reader.get_pos() - 1 + offset..reader.get_pos() - 1 + offset      } -    fn emit_error(&mut self, error: Error, reader: &R) { -        self.emit_error_span(error, reader.get_pos() - 1..reader.get_pos() - 1) -    } - -    fn pop_token(&mut self) -> Option<Self::Token> { -        self.emitted_tokens.pop_back() -    } - -    fn emit_string(&mut self, s: &str) { -        self.current_characters.push_str(s); -    } - -    fn init_start_tag(&mut self, reader: &R) { -        self.current_token = Some(Token::StartTag(StartTag { -            name_span: reader.get_pos() - 1..reader.get_pos() - 1, -            ..Default::default() -        })); -    } -    fn init_end_tag(&mut self, reader: &R) { -        self.current_token = Some(Token::EndTag(EndTag { -            name_span: reader.get_pos() - 1..reader.get_pos() - 1, -            ..Default::default() -        })); -        self.seen_attributes.clear(); -    } - -    fn init_comment(&mut self, _reader: &R) { -        self.current_token = Some(Token::Comment(String::new())); -    } -    fn emit_current_tag(&mut self) { -        self.flush_current_attribute(); -        let mut token = self.current_token.take().unwrap(); -        match token { -            Token::EndTag(_) => { -                if !self.seen_attributes.is_empty() { -                    let span = self.attr_in_end_tag_span.take().unwrap(); -                    self.emit_error_span(Error::EndTagWithAttributes, span); -                } -                self.seen_attributes.clear(); -            } -            Token::StartTag(ref mut _tag) => { -                self.set_last_start_tag(Some(&_tag.name)); -            } -            _ => debug_assert!(false), -        } -        self.emit_token(token); -    } -    fn emit_current_comment(&mut self) { -        let comment = self.current_token.take().unwrap(); -        debug_assert!(matches!(comment, Token::Comment(_))); -        self.emit_token(comment); +    fn push_str(&mut self, str: &str) { +        self.end += str.len();      } +} -    fn emit_current_doctype(&mut self) { -        let doctype = self.current_token.take().unwrap(); -        debug_assert!(matches!(doctype, Token::Doctype(_))); -        self.emit_token(doctype); -    } +impl<R: Reader> Reader for PosTracker<R> { +    type Error = R::Error; -    fn set_self_closing(&mut self, reader: &R) { -        let tag = self.current_token.as_mut().unwrap(); -        match tag { -            Token::StartTag(StartTag { -                ref mut self_closing, -                .. -            }) => { -                *self_closing = true; -            } -            Token::EndTag(_) => { -                self.emit_error(Error::EndTagWithTrailingSolidus, reader); -            } -            _ => { -                debug_assert!(false); -            } -        } -    } -    fn set_force_quirks(&mut self) { -        match self.current_token { -            Some(Token::Doctype(ref mut doctype)) => doctype.force_quirks = true, -            _ => debug_assert!(false), -        } -    } -    fn push_tag_name(&mut self, s: &str) { -        match self.current_token { -            Some(Token::StartTag(StartTag { -                ref mut name, -                ref mut name_span, -                .. -            })) => { -                name.push_str(s); -                name_span.end += s.len(); -            } -            Some(Token::EndTag(EndTag { -                ref mut name, -                ref mut name_span, -                .. -            })) => { -                name.push_str(s); -                name_span.end += s.len(); +    fn read_char(&mut self) -> Result<Option<char>, Self::Error> { +        match self.reader.read_char()? { +            Some(char) => { +                self.position += char.len_utf8(); +                Ok(Some(char))              } -            _ => debug_assert!(false), -        } -    } - -    fn push_comment(&mut self, s: &str) { -        match self.current_token { -            Some(Token::Comment(ref mut data)) => data.push_str(s), -            _ => debug_assert!(false), -        } -    } - -    fn push_doctype_name(&mut self, s: &str) { -        match self.current_token { -            Some(Token::Doctype(ref mut doctype)) => doctype.name.push_str(s), -            _ => debug_assert!(false), -        } -    } -    fn init_doctype(&mut self, _reader: &R) { -        self.current_token = Some(Token::Doctype(Doctype { -            name: String::new(), -            force_quirks: false, -            public_identifier: None, -            system_identifier: None, -        })); -    } - -    fn init_attribute_name(&mut self, reader: &R) { -        self.flush_current_attribute(); -        self.current_attribute = Some(( -            String::new(), -            Attribute { -                name_span: reader.get_pos() - 1..reader.get_pos() - 1, -                ..Default::default() -            }, -        )); -    } - -    fn init_attribute_value(&mut self, reader: &R, quoted: bool) { -        let current_attr = self.current_attribute.as_mut().unwrap(); -        let offset = if quoted { 0 } else { 1 }; -        current_attr.1.value_span = reader.get_pos() - offset..reader.get_pos() - offset; -    } - -    fn push_attribute_name(&mut self, s: &str) { -        let current_attr = self.current_attribute.as_mut().unwrap(); -        current_attr.0.push_str(s); -        current_attr.1.name_span.end += s.len(); -    } -    fn push_attribute_value(&mut self, s: &str) { -        let current_attr = self.current_attribute.as_mut().unwrap(); -        current_attr.1.value.push_str(s); -        current_attr.1.value_span.end += s.len(); -    } -    fn set_doctype_public_identifier(&mut self, value: &str) { -        if let Some(Token::Doctype(Doctype { -            ref mut public_identifier, -            .. -        })) = self.current_token -        { -            *public_identifier = Some(value.to_owned()); -        } else { -            debug_assert!(false); -        } -    } -    fn set_doctype_system_identifier(&mut self, value: &str) { -        if let Some(Token::Doctype(Doctype { -            ref mut system_identifier, -            .. -        })) = self.current_token -        { -            *system_identifier = Some(value.to_owned()); -        } else { -            debug_assert!(false); -        } -    } -    fn push_doctype_public_identifier(&mut self, s: &str) { -        if let Some(Token::Doctype(Doctype { -            public_identifier: Some(ref mut id), -            .. -        })) = self.current_token -        { -            id.push_str(s); -        } else { -            debug_assert!(false); -        } -    } -    fn push_doctype_system_identifier(&mut self, s: &str) { -        if let Some(Token::Doctype(Doctype { -            system_identifier: Some(ref mut id), -            .. -        })) = self.current_token -        { -            id.push_str(s); -        } else { -            debug_assert!(false); +            None => Ok(None),          }      } -    fn current_is_appropriate_end_tag_token(&mut self) -> bool { -        match self.current_token { -            Some(Token::EndTag(ref tag)) => { -                !self.last_start_tag.is_empty() && self.last_start_tag == tag.name +    fn try_read_string(&mut self, s: &str, case_sensitive: bool) -> Result<bool, Self::Error> { +        match self.reader.try_read_string(s, case_sensitive)? { +            true => { +                self.position += s.len(); +                Ok(true)              } -            _ => false, +            false => Ok(false),          }      }  } | 
