diff options
Diffstat (limited to 'src/emitter.rs')
-rw-r--r-- | src/emitter.rs | 81 |
1 files changed, 42 insertions, 39 deletions
diff --git a/src/emitter.rs b/src/emitter.rs index 18b2539..b3fdb99 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -4,9 +4,11 @@ use std::collections::BTreeSet; use std::collections::VecDeque; use std::marker::PhantomData; use std::mem; +use std::ops::Range; -use crate::spans::Position; -use crate::spans::Span; +use crate::offset::NoopOffset; +use crate::offset::Offset; +use crate::offset::Position; use crate::Error; /// An emitter is an object providing methods to the tokenizer to produce ("emit") tokens. @@ -160,17 +162,17 @@ pub trait Emitter<R> { } /// The default implementation of [`Emitter`], used to produce tokens. -pub struct DefaultEmitter<R, S = ()> { +pub struct DefaultEmitter<R, O = NoopOffset> { current_characters: String, - current_token: Option<Token<S>>, - current_attribute: Option<(String, Attribute<S>)>, + current_token: Option<Token<O>>, + current_attribute: Option<(String, Attribute<O>)>, seen_attributes: BTreeSet<String>, - emitted_tokens: VecDeque<Token<S>>, + emitted_tokens: VecDeque<Token<O>>, reader: PhantomData<R>, - attr_in_end_tag_span: Option<S>, + attr_in_end_tag_span: Option<Range<O>>, } -impl<R, S> Default for DefaultEmitter<R, S> { +impl<R, O> Default for DefaultEmitter<R, O> { fn default() -> Self { DefaultEmitter { current_characters: String::new(), @@ -184,13 +186,16 @@ impl<R, S> Default for DefaultEmitter<R, S> { } } -impl<R, S: Span> DefaultEmitter<R, S> { - fn emit_token(&mut self, token: Token<S>) { +impl<R, O> DefaultEmitter<R, O> { + fn emit_token(&mut self, token: Token<O>) { self.flush_current_characters(); self.emitted_tokens.push_front(token); } - fn flush_current_attribute(&mut self) { + fn flush_current_attribute(&mut self) + where + O: Clone, + { if let Some((k, v)) = self.current_attribute.take() { match self.current_token { Some(Token::StartTag(ref mut tag)) => match tag.attributes.entry(k) { @@ -223,22 +228,22 @@ impl<R, S: Span> DefaultEmitter<R, S> { self.emit_token(Token::String(s)); } - fn push_error(&mut self, error: Error, span: S) { + fn push_error(&mut self, error: Error, span: Range<O>) { // bypass character flushing in self.emit_token: we don't need the error location to be // that exact self.emitted_tokens.push_front(Token::Error { error, span }); } } -impl<R: Position<S::Offset>, S: Span> Emitter<R> for DefaultEmitter<R, S> { - type Token = Token<S>; +impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> { + type Token = Token<O>; fn emit_eof(&mut self) { self.flush_current_characters(); } fn emit_error(&mut self, error: Error, reader: &R) { - self.push_error(error, S::new(reader.position(), reader.position())); + self.push_error(error, reader.position()..reader.position()); } fn pop_token(&mut self) -> Option<Self::Token> { @@ -251,7 +256,7 @@ impl<R: Position<S::Offset>, S: Span> Emitter<R> for DefaultEmitter<R, S> { fn init_start_tag(&mut self, reader: &R) { self.current_token = Some(Token::StartTag(StartTag { - name_span: S::new(reader.position(), reader.position()), + name_span: reader.position()..reader.position(), self_closing: false, name: String::new(), attributes: Default::default(), @@ -259,7 +264,7 @@ impl<R: Position<S::Offset>, S: Span> Emitter<R> for DefaultEmitter<R, S> { } fn init_end_tag(&mut self, reader: &R) { self.current_token = Some(Token::EndTag(EndTag { - name_span: S::new(reader.position(), reader.position()), + name_span: reader.position()..reader.position(), name: String::new(), })); self.seen_attributes.clear(); @@ -327,7 +332,7 @@ impl<R: Position<S::Offset>, S: Span> Emitter<R> for DefaultEmitter<R, S> { .. })) => { name.push_str(s); - name_span.push_str(s); + name_span.end += s.len(); } Some(Token::EndTag(EndTag { ref mut name, @@ -335,7 +340,7 @@ impl<R: Position<S::Offset>, S: Span> Emitter<R> for DefaultEmitter<R, S> { .. })) => { name.push_str(s); - name_span.push_str(s); + name_span.end += s.len(); } _ => debug_assert!(false), } @@ -368,28 +373,26 @@ impl<R: Position<S::Offset>, S: Span> Emitter<R> for DefaultEmitter<R, S> { self.current_attribute = Some(( String::new(), Attribute { - name_span: S::new(reader.position(), reader.position()), + name_span: reader.position()..reader.position(), value: String::new(), - value_span: S::default(), + value_span: Range::default(), }, )); } fn init_attribute_value(&mut self, reader: &R, quoted: bool) { - self.current_attribute.as_mut().unwrap().1.value_span = S::new( - reader.position() + quoted as usize, - reader.position() + quoted as usize, - ); + self.current_attribute.as_mut().unwrap().1.value_span = + reader.position() + quoted as usize..reader.position() + quoted as usize; } fn push_attribute_name(&mut self, s: &str) { let current_attr = self.current_attribute.as_mut().unwrap(); current_attr.0.push_str(s); - current_attr.1.name_span.push_str(s); + current_attr.1.name_span.end += s.len(); } fn push_attribute_value(&mut self, s: &str) { let current_attr = self.current_attribute.as_mut().unwrap(); current_attr.1.value.push_str(s); - current_attr.1.value_span.push_str(s); + current_attr.1.value_span.end += s.len(); } fn set_doctype_public_identifier(&mut self, value: &str) { if let Some(Token::Doctype(Doctype { @@ -439,7 +442,7 @@ impl<R: Position<S::Offset>, S: Span> Emitter<R> for DefaultEmitter<R, S> { /// An HTML start tag, such as `<p>` or `<a>`. #[derive(Debug, Eq, PartialEq)] -pub struct StartTag<S> { +pub struct StartTag<O> { /// Whether this tag is self-closing. If it is self-closing, no following [`EndTag`] should be /// expected. pub self_closing: bool, @@ -451,33 +454,33 @@ pub struct StartTag<S> { /// /// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own /// [`Emitter`] to tweak this behavior. - pub attributes: BTreeMap<String, Attribute<S>>, + pub attributes: BTreeMap<String, Attribute<O>>, /// The source code span of the tag name. - pub name_span: S, + pub name_span: Range<O>, } /// A HTML attribute value (plus spans). #[derive(Debug, Eq, PartialEq)] -pub struct Attribute<S> { +pub struct Attribute<O> { /// The value of the attribute. pub value: String, /// The source code span of the attribute name. - pub name_span: S, + pub name_span: Range<O>, /// The source code span of the attribute value. - pub value_span: S, + pub value_span: Range<O>, } /// A HTML end/close tag, such as `</p>` or `</a>`. #[derive(Debug, Eq, PartialEq)] -pub struct EndTag<S> { +pub struct EndTag<O> { /// The ending tag's name, such as `"p"` or `"a"`. pub name: String, /// The source code span of the tag name. - pub name_span: S, + pub name_span: Range<O>, } /// A doctype. Some examples: @@ -504,11 +507,11 @@ pub struct Doctype { /// The token type used by default. You can define your own token type by implementing the /// [`Emitter`] trait. #[derive(Debug, Eq, PartialEq)] -pub enum Token<S> { +pub enum Token<O> { /// A HTML start tag. - StartTag(StartTag<S>), + StartTag(StartTag<O>), /// A HTML end tag. - EndTag(EndTag<S>), + EndTag(EndTag<O>), /// A literal string. String(String), /// A HTML comment. @@ -523,6 +526,6 @@ pub enum Token<S> { /// What kind of error occured. error: Error, /// The source code span of the error. - span: S, + span: Range<O>, }, } |