diff options
author | Martin Fischer <martin@push-f.com> | 2023-08-17 09:40:47 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-08-19 13:41:55 +0200 |
commit | c15895d44d17984386d3684e2aa85aca386ba3bf (patch) | |
tree | a7c92e5eff97bd7645c7d309c8bf94ea891459ad /src/machine.rs | |
parent | d5c9a851756b1e84b022c2fbf984137aae68e2c9 (diff) |
refactor!: make Emitter generic over offset instead of reader
Emitters should not have access to the reader at all. Also the
current position of the reader, at the time an Emitted method is
called, very much depends on machine implementation details such
as if `Tokenizer::unread_char` is used. Having the Emitter
methods take offsets lets the machine take care of providing
the right offsets, as evidenced by the next commit.
Diffstat (limited to 'src/machine.rs')
-rw-r--r-- | src/machine.rs | 45 |
1 files changed, 25 insertions, 20 deletions
diff --git a/src/machine.rs b/src/machine.rs index c11720d..deb3983 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1,4 +1,5 @@ use crate::entities::try_read_character_reference; +use crate::offset::{Offset, Position}; use crate::utils::{ ascii_digit_pat, control_pat, ctostr, noncharacter_pat, surrogate_pat, whitespace_pat, ControlToken, State, @@ -8,10 +9,11 @@ use crate::{reader::Reader, Emitter, Error, Tokenizer}; // Note: This is not implemented as a method on Tokenizer because there's fields on Tokenizer that // should not be available in this method, such as Tokenizer.to_reconsume or the Reader instance #[inline] -pub fn consume<R, E>(slf: &mut Tokenizer<R, E>) -> Result<ControlToken, R::Error> +pub fn consume<O, R, E>(slf: &mut Tokenizer<R, O, E>) -> Result<ControlToken, R::Error> where - R: Reader, - E: Emitter<R>, + O: Offset, + R: Reader + Position<O>, + E: Emitter<O>, { macro_rules! mutate_character_reference { (* $mul:literal + $x:ident - $sub:literal) => { @@ -133,7 +135,7 @@ where } c @ Some('?') => { slf.emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName); - slf.emitter.init_comment(&slf.reader); + slf.emitter.init_comment(slf.reader.position()); slf.state = State::BogusComment; slf.unread_char(c); Ok(ControlToken::Continue) @@ -170,7 +172,7 @@ where } Some(x) => { slf.emit_error(Error::InvalidFirstCharacterOfTagName); - slf.emitter.init_comment(&slf.reader); + slf.emitter.init_comment(slf.reader.position()); slf.state = State::BogusComment; slf.unread_char(Some(x)); Ok(ControlToken::Continue) @@ -686,13 +688,13 @@ where } Some('=') => { slf.emit_error(Error::UnexpectedEqualsSignBeforeAttributeName); - slf.emitter.init_attribute_name(&slf.reader); + slf.emitter.init_attribute_name(slf.reader.position()); slf.emitter.push_attribute_name("="); slf.state = State::AttributeName; Ok(ControlToken::Continue) } Some(x) => { - slf.emitter.init_attribute_name(&slf.reader); + slf.emitter.init_attribute_name(slf.reader.position()); slf.state = State::AttributeName; slf.unread_char(Some(x)); Ok(ControlToken::Continue) @@ -745,7 +747,7 @@ where Ok(ControlToken::Eof) } Some(x) => { - slf.emitter.init_attribute_name(&slf.reader); + slf.emitter.init_attribute_name(slf.reader.position()); slf.state = State::AttributeName; slf.unread_char(Some(x)); Ok(ControlToken::Continue) @@ -754,12 +756,14 @@ where State::BeforeAttributeValue => match slf.read_char()? { Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('"') => { - slf.emitter.init_attribute_value(&slf.reader, true); + slf.emitter + .init_attribute_value(slf.reader.position(), true); slf.state = State::AttributeValueDoubleQuoted; Ok(ControlToken::Continue) } Some('\'') => { - slf.emitter.init_attribute_value(&slf.reader, true); + slf.emitter + .init_attribute_value(slf.reader.position(), true); slf.state = State::AttributeValueSingleQuoted; Ok(ControlToken::Continue) } @@ -770,7 +774,8 @@ where Ok(ControlToken::Continue) } c => { - slf.emitter.init_attribute_value(&slf.reader, false); + slf.emitter + .init_attribute_value(slf.reader.position(), false); slf.state = State::AttributeValueUnquoted; slf.unread_char(c); Ok(ControlToken::Continue) @@ -885,7 +890,7 @@ where }, State::SelfClosingStartTag => match slf.read_char()? { Some('>') => { - slf.emitter.set_self_closing(&slf.reader); + slf.emitter.set_self_closing(slf.reader.position()); slf.state = State::Data; slf.emit_current_tag(); Ok(ControlToken::Continue) @@ -923,7 +928,7 @@ where }, State::MarkupDeclarationOpen => match slf.read_char()? { Some('-') if slf.try_read_string("-", true)? => { - slf.emitter.init_comment(&slf.reader); + slf.emitter.init_comment(slf.reader.position()); slf.state = State::CommentStart; Ok(ControlToken::Continue) } @@ -940,14 +945,14 @@ where // let's hope that bogus comment can just sort of skip over cdata slf.emit_error(Error::CdataInHtmlContent); - slf.emitter.init_comment(&slf.reader); + slf.emitter.init_comment(slf.reader.position()); slf.emitter.push_comment("[CDATA["); slf.state = State::BogusComment; Ok(ControlToken::Continue) } c => { slf.emit_error(Error::IncorrectlyOpenedComment); - slf.emitter.init_comment(&slf.reader); + slf.emitter.init_comment(slf.reader.position()); slf.state = State::BogusComment; slf.unread_char(c); Ok(ControlToken::Continue) @@ -1153,7 +1158,7 @@ where } None => { slf.emit_error(Error::EofInDoctype); - slf.emitter.init_doctype(&slf.reader); + slf.emitter.init_doctype(slf.reader.position()); slf.emitter.set_force_quirks(); slf.emitter.emit_current_doctype(); Ok(ControlToken::Eof) @@ -1169,14 +1174,14 @@ where Some(whitespace_pat!()) => Ok(ControlToken::Continue), Some('\0') => { slf.emit_error(Error::UnexpectedNullCharacter); - slf.emitter.init_doctype(&slf.reader); + slf.emitter.init_doctype(slf.reader.position()); slf.emitter.push_doctype_name("\u{fffd}"); slf.state = State::DoctypeName; Ok(ControlToken::Continue) } Some('>') => { slf.emit_error(Error::MissingDoctypeName); - slf.emitter.init_doctype(&slf.reader); + slf.emitter.init_doctype(slf.reader.position()); slf.emitter.set_force_quirks(); slf.state = State::Data; slf.emitter.emit_current_doctype(); @@ -1184,13 +1189,13 @@ where } None => { slf.emit_error(Error::EofInDoctype); - slf.emitter.init_doctype(&slf.reader); + slf.emitter.init_doctype(slf.reader.position()); slf.emitter.set_force_quirks(); slf.emitter.emit_current_doctype(); Ok(ControlToken::Eof) } Some(x) => { - slf.emitter.init_doctype(&slf.reader); + slf.emitter.init_doctype(slf.reader.position()); slf.emitter .push_doctype_name(ctostr!(x.to_ascii_lowercase())); slf.state = State::DoctypeName; |