aboutsummaryrefslogtreecommitdiff
path: root/src/machine.rs
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-08-17 09:40:47 +0200
committerMartin Fischer <martin@push-f.com>2023-08-19 13:41:55 +0200
commitc15895d44d17984386d3684e2aa85aca386ba3bf (patch)
treea7c92e5eff97bd7645c7d309c8bf94ea891459ad /src/machine.rs
parentd5c9a851756b1e84b022c2fbf984137aae68e2c9 (diff)
refactor!: make Emitter generic over offset instead of reader
Emitters should not have access to the reader at all. Also the current position of the reader, at the time an Emitted method is called, very much depends on machine implementation details such as if `Tokenizer::unread_char` is used. Having the Emitter methods take offsets lets the machine take care of providing the right offsets, as evidenced by the next commit.
Diffstat (limited to 'src/machine.rs')
-rw-r--r--src/machine.rs45
1 files changed, 25 insertions, 20 deletions
diff --git a/src/machine.rs b/src/machine.rs
index c11720d..deb3983 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1,4 +1,5 @@
use crate::entities::try_read_character_reference;
+use crate::offset::{Offset, Position};
use crate::utils::{
ascii_digit_pat, control_pat, ctostr, noncharacter_pat, surrogate_pat, whitespace_pat,
ControlToken, State,
@@ -8,10 +9,11 @@ use crate::{reader::Reader, Emitter, Error, Tokenizer};
// Note: This is not implemented as a method on Tokenizer because there's fields on Tokenizer that
// should not be available in this method, such as Tokenizer.to_reconsume or the Reader instance
#[inline]
-pub fn consume<R, E>(slf: &mut Tokenizer<R, E>) -> Result<ControlToken, R::Error>
+pub fn consume<O, R, E>(slf: &mut Tokenizer<R, O, E>) -> Result<ControlToken, R::Error>
where
- R: Reader,
- E: Emitter<R>,
+ O: Offset,
+ R: Reader + Position<O>,
+ E: Emitter<O>,
{
macro_rules! mutate_character_reference {
(* $mul:literal + $x:ident - $sub:literal) => {
@@ -133,7 +135,7 @@ where
}
c @ Some('?') => {
slf.emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName);
- slf.emitter.init_comment(&slf.reader);
+ slf.emitter.init_comment(slf.reader.position());
slf.state = State::BogusComment;
slf.unread_char(c);
Ok(ControlToken::Continue)
@@ -170,7 +172,7 @@ where
}
Some(x) => {
slf.emit_error(Error::InvalidFirstCharacterOfTagName);
- slf.emitter.init_comment(&slf.reader);
+ slf.emitter.init_comment(slf.reader.position());
slf.state = State::BogusComment;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
@@ -686,13 +688,13 @@ where
}
Some('=') => {
slf.emit_error(Error::UnexpectedEqualsSignBeforeAttributeName);
- slf.emitter.init_attribute_name(&slf.reader);
+ slf.emitter.init_attribute_name(slf.reader.position());
slf.emitter.push_attribute_name("=");
slf.state = State::AttributeName;
Ok(ControlToken::Continue)
}
Some(x) => {
- slf.emitter.init_attribute_name(&slf.reader);
+ slf.emitter.init_attribute_name(slf.reader.position());
slf.state = State::AttributeName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
@@ -745,7 +747,7 @@ where
Ok(ControlToken::Eof)
}
Some(x) => {
- slf.emitter.init_attribute_name(&slf.reader);
+ slf.emitter.init_attribute_name(slf.reader.position());
slf.state = State::AttributeName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
@@ -754,12 +756,14 @@ where
State::BeforeAttributeValue => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('"') => {
- slf.emitter.init_attribute_value(&slf.reader, true);
+ slf.emitter
+ .init_attribute_value(slf.reader.position(), true);
slf.state = State::AttributeValueDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
- slf.emitter.init_attribute_value(&slf.reader, true);
+ slf.emitter
+ .init_attribute_value(slf.reader.position(), true);
slf.state = State::AttributeValueSingleQuoted;
Ok(ControlToken::Continue)
}
@@ -770,7 +774,8 @@ where
Ok(ControlToken::Continue)
}
c => {
- slf.emitter.init_attribute_value(&slf.reader, false);
+ slf.emitter
+ .init_attribute_value(slf.reader.position(), false);
slf.state = State::AttributeValueUnquoted;
slf.unread_char(c);
Ok(ControlToken::Continue)
@@ -885,7 +890,7 @@ where
},
State::SelfClosingStartTag => match slf.read_char()? {
Some('>') => {
- slf.emitter.set_self_closing(&slf.reader);
+ slf.emitter.set_self_closing(slf.reader.position());
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
@@ -923,7 +928,7 @@ where
},
State::MarkupDeclarationOpen => match slf.read_char()? {
Some('-') if slf.try_read_string("-", true)? => {
- slf.emitter.init_comment(&slf.reader);
+ slf.emitter.init_comment(slf.reader.position());
slf.state = State::CommentStart;
Ok(ControlToken::Continue)
}
@@ -940,14 +945,14 @@ where
// let's hope that bogus comment can just sort of skip over cdata
slf.emit_error(Error::CdataInHtmlContent);
- slf.emitter.init_comment(&slf.reader);
+ slf.emitter.init_comment(slf.reader.position());
slf.emitter.push_comment("[CDATA[");
slf.state = State::BogusComment;
Ok(ControlToken::Continue)
}
c => {
slf.emit_error(Error::IncorrectlyOpenedComment);
- slf.emitter.init_comment(&slf.reader);
+ slf.emitter.init_comment(slf.reader.position());
slf.state = State::BogusComment;
slf.unread_char(c);
Ok(ControlToken::Continue)
@@ -1153,7 +1158,7 @@ where
}
None => {
slf.emit_error(Error::EofInDoctype);
- slf.emitter.init_doctype(&slf.reader);
+ slf.emitter.init_doctype(slf.reader.position());
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
@@ -1169,14 +1174,14 @@ where
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
- slf.emitter.init_doctype(&slf.reader);
+ slf.emitter.init_doctype(slf.reader.position());
slf.emitter.push_doctype_name("\u{fffd}");
slf.state = State::DoctypeName;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::MissingDoctypeName);
- slf.emitter.init_doctype(&slf.reader);
+ slf.emitter.init_doctype(slf.reader.position());
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype();
@@ -1184,13 +1189,13 @@ where
}
None => {
slf.emit_error(Error::EofInDoctype);
- slf.emitter.init_doctype(&slf.reader);
+ slf.emitter.init_doctype(slf.reader.position());
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype();
Ok(ControlToken::Eof)
}
Some(x) => {
- slf.emitter.init_doctype(&slf.reader);
+ slf.emitter.init_doctype(slf.reader.position());
slf.emitter
.push_doctype_name(ctostr!(x.to_ascii_lowercase()));
slf.state = State::DoctypeName;