aboutsummaryrefslogtreecommitdiff
path: root/src/emitter.rs
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-17 09:40:47 +0200
committerMartin Fischer <martin@push-f.com>2023-08-19 13:41:55 +0200
commitc15895d44d17984386d3684e2aa85aca386ba3bf (patch)
treea7c92e5eff97bd7645c7d309c8bf94ea891459ad /src/emitter.rs
parentd5c9a851756b1e84b022c2fbf984137aae68e2c9 (diff)
refactor!: make Emitter generic over offset instead of reader
Emitters should not have access to the reader at all. Also the current position of the reader, at the time an Emitted method is called, very much depends on machine implementation details such as if `Tokenizer::unread_char` is used. Having the Emitter methods take offsets lets the machine take care of providing the right offsets, as evidenced by the next commit.
Diffstat (limited to 'src/emitter.rs')
-rw-r--r--src/emitter.rs60
1 files changed, 28 insertions, 32 deletions
diff --git a/src/emitter.rs b/src/emitter.rs
index caf7b55..1f60f70 100644
--- a/src/emitter.rs
+++ b/src/emitter.rs
@@ -2,13 +2,11 @@ use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::VecDeque;
-use std::marker::PhantomData;
use std::mem;
use std::ops::Range;
use crate::offset::NoopOffset;
use crate::offset::Offset;
-use crate::offset::Position;
use crate::Error;
/// An emitter is an object providing methods to the tokenizer to produce ("emit") tokens.
@@ -29,7 +27,7 @@ use crate::Error;
/// checks that would emit errors.
///
/// * If you don't care about attributes at all, you can make all related methods a noop.
-pub trait Emitter<R> {
+pub trait Emitter<O> {
/// The token type emitted by this emitter. This controls what type of values the [`Tokenizer`](crate::Tokenizer)
/// yields when used as an iterator.
type Token;
@@ -39,7 +37,7 @@ pub trait Emitter<R> {
fn emit_eof(&mut self);
/// A (probably recoverable) parsing error has occured.
- fn emit_error(&mut self, error: Error, reader: &R);
+ fn emit_error(&mut self, error: Error, offset: O);
/// After every state change, the tokenizer calls this method to retrieve a new token that can
/// be returned via the tokenizer's iterator interface.
@@ -49,13 +47,13 @@ pub trait Emitter<R> {
fn emit_string(&mut self, c: &str);
/// Set the _current token_ to a start tag.
- fn init_start_tag(&mut self, reader: &R);
+ fn init_start_tag(&mut self, offset: O);
/// Set the _current token_ to an end tag.
- fn init_end_tag(&mut self, reader: &R);
+ fn init_end_tag(&mut self, offset: O);
/// Set the _current token_ to a comment.
- fn init_comment(&mut self, reader: &R);
+ fn init_comment(&mut self, data_offset: O);
/// Emit the _current token_, assuming it is a tag.
///
@@ -84,7 +82,7 @@ pub trait Emitter<R> {
///
/// If the current token is an end tag, the emitter should emit the
/// [`Error::EndTagWithTrailingSolidus`] error.
- fn set_self_closing(&mut self, reader: &R);
+ fn set_self_closing(&mut self, offset: O);
/// Assuming the _current token_ is a doctype, set its "force quirks" flag to true.
///
@@ -112,7 +110,7 @@ pub trait Emitter<R> {
/// * the "public identifier" should be null (different from empty)
/// * the "system identifier" should be null (different from empty)
/// * the "force quirks" flag should be `false`
- fn init_doctype(&mut self, reader: &R);
+ fn init_doctype(&mut self, offset: O);
/// Set the _current attribute_ to a new one, starting with empty name and value strings.
///
@@ -121,14 +119,14 @@ pub trait Emitter<R> {
/// [`Error::DuplicateAttribute`] error should be emitted.
///
/// If the current token is no tag at all, this method may panic.
- fn init_attribute_name(&mut self, reader: &R);
+ fn init_attribute_name(&mut self, offset: O);
/// Called before the first push_attribute_value call.
/// If the value is wrappend in double or single quotes `quoted` is set to true, otherwise false.
///
/// If there is no current attribute, this method may panic.
#[allow(unused_variables)]
- fn init_attribute_value(&mut self, reader: &R, quoted: bool) {}
+ fn init_attribute_value(&mut self, offset: O, quoted: bool) {}
/// Append a string to the current attribute's name.
///
@@ -162,17 +160,16 @@ pub trait Emitter<R> {
}
/// The default implementation of [`Emitter`], used to produce tokens.
-pub struct DefaultEmitter<R, O = NoopOffset> {
+pub struct DefaultEmitter<O = NoopOffset> {
current_characters: String,
current_token: Option<Token<O>>,
current_attribute: Option<(String, Attribute<O>)>,
seen_attributes: BTreeSet<String>,
emitted_tokens: VecDeque<Token<O>>,
- reader: PhantomData<R>,
attr_in_end_tag_span: Option<Range<O>>,
}
-impl<R, O> Default for DefaultEmitter<R, O> {
+impl<O> Default for DefaultEmitter<O> {
fn default() -> Self {
DefaultEmitter {
current_characters: String::new(),
@@ -180,13 +177,12 @@ impl<R, O> Default for DefaultEmitter<R, O> {
current_attribute: None,
seen_attributes: BTreeSet::new(),
emitted_tokens: VecDeque::new(),
- reader: PhantomData::default(),
attr_in_end_tag_span: None,
}
}
}
-impl<R, O> DefaultEmitter<R, O> {
+impl<O> DefaultEmitter<O> {
fn emit_token(&mut self, token: Token<O>) {
self.flush_current_characters();
self.emitted_tokens.push_front(token);
@@ -235,15 +231,15 @@ impl<R, O> DefaultEmitter<R, O> {
}
}
-impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {
+impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
type Token = Token<O>;
fn emit_eof(&mut self) {
self.flush_current_characters();
}
- fn emit_error(&mut self, error: Error, reader: &R) {
- self.push_error(error, reader.position()..reader.position());
+ fn emit_error(&mut self, error: Error, offset: O) {
+ self.push_error(error, offset..offset);
}
fn pop_token(&mut self) -> Option<Self::Token> {
@@ -254,26 +250,26 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {
self.current_characters.push_str(s);
}
- fn init_start_tag(&mut self, reader: &R) {
+ fn init_start_tag(&mut self, offset: O) {
self.current_token = Some(Token::StartTag(StartTag {
- name_span: reader.position()..reader.position(),
+ name_span: offset..offset,
self_closing: false,
name: String::new(),
attributes: Default::default(),
}));
}
- fn init_end_tag(&mut self, reader: &R) {
+ fn init_end_tag(&mut self, offset: O) {
self.current_token = Some(Token::EndTag(EndTag {
- name_span: reader.position()..reader.position(),
+ name_span: offset..offset,
name: String::new(),
}));
self.seen_attributes.clear();
}
- fn init_comment(&mut self, reader: &R) {
+ fn init_comment(&mut self, data_offset: O) {
self.current_token = Some(Token::Comment(Comment {
data: String::new(),
- data_offset: reader.position(),
+ data_offset,
}));
}
fn emit_current_tag(&mut self) {
@@ -304,7 +300,7 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {
self.emit_token(doctype);
}
- fn set_self_closing(&mut self, reader: &R) {
+ fn set_self_closing(&mut self, offset: O) {
let tag = self.current_token.as_mut().unwrap();
match tag {
Token::StartTag(StartTag {
@@ -314,7 +310,7 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {
*self_closing = true;
}
Token::EndTag(_) => {
- self.emit_error(Error::EndTagWithTrailingSolidus, reader);
+ self.emit_error(Error::EndTagWithTrailingSolidus, offset);
}
_ => {
debug_assert!(false);
@@ -362,7 +358,7 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {
_ => debug_assert!(false),
}
}
- fn init_doctype(&mut self, _reader: &R) {
+ fn init_doctype(&mut self, _offset: O) {
self.current_token = Some(Token::Doctype(Doctype {
name: String::new(),
force_quirks: false,
@@ -371,20 +367,20 @@ impl<O: Offset, R: Position<O>> Emitter<R> for DefaultEmitter<R, O> {
}));
}
- fn init_attribute_name(&mut self, reader: &R) {
+ fn init_attribute_name(&mut self, offset: O) {
self.flush_current_attribute();
self.current_attribute = Some((
String::new(),
Attribute {
- name_span: reader.position()..reader.position(),
+ name_span: offset..offset,
value: String::new(),
value_span: Range::default(),
},
));
}
- fn init_attribute_value(&mut self, reader: &R, quoted: bool) {
+ fn init_attribute_value(&mut self, offset: O, quoted: bool) {
self.current_attribute.as_mut().unwrap().1.value_span =
- reader.position() + quoted as usize..reader.position() + quoted as usize;
+ offset + quoted as usize..offset + quoted as usize;
}
fn push_attribute_name(&mut self, s: &str) {