diff options
author | Martin Fischer <martin@push-f.com> | 2021-11-30 07:28:21 +0100 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2021-11-30 11:22:35 +0100 |
commit | 14f1a85d994ad97dae3d9de735fc51adb25d390a (patch) | |
tree | 0fa0d7c173a19dcb7117132325a801808302bcf8 /src/tokenizer/mod.rs | |
parent | baf1477c587fe22d27e94408cf2505d588ba007e (diff) |
introduce Error enum
Diffstat (limited to 'src/tokenizer/mod.rs')
-rw-r--r-- | src/tokenizer/mod.rs | 40 |
1 files changed, 18 insertions, 22 deletions
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 4511cf8..78101f6 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -9,6 +9,7 @@ //! The HTML5 tokenizer. +use self::error::InternalState; pub use self::interface::{Attribute, Doctype, Tag, TagKind, Token}; use self::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError}; use self::interface::{CommentToken, DoctypeToken, EndTag, StartTag, TagToken}; @@ -21,9 +22,9 @@ use self::states::{Rawtext, Rcdata, ScriptData, ScriptDataEscaped}; use self::char_ref::{CharRef, CharRefTokenizer}; +use crate::error::Error; use crate::util::{smallcharset::SmallCharSet, str::lower_ascii_letter}; -use std::borrow::Cow::{self, Borrowed}; use std::collections::BTreeMap; use std::default::Default; use std::mem::replace; @@ -34,6 +35,7 @@ use crate::util::buffer_queue::{FromSet, NotFromSet, SetResult}; pub use states::RawKind; mod char_ref; +pub mod error; mod interface; mod states; @@ -292,8 +294,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { _ => false, } { - let msg = format!("Bad character {}", c); - self.emit_error(Cow::Owned(msg)); + self.emit_error(Error::BadCharacter(c)); } #[cfg(feature = "spans")] @@ -400,24 +401,14 @@ impl<Sink: TokenSink> Tokenizer<Sink> { } fn bad_char_error(&mut self) { - let msg = format_if!( - self.opts.exact_errors, - "Bad character", - "Saw {} in state {:?}", + self.emit_error(Error::UnexpectedCharacter( self.current_char, - self.state - ); - self.emit_error(msg); + InternalState(self.state), + )); } fn bad_eof_error(&mut self) { - let msg = format_if!( - self.opts.exact_errors, - "Unexpected EOF", - "Saw EOF in state {:?}", - self.state - ); - self.emit_error(msg); + self.emit_error(Error::UnexpectedEOF(InternalState(self.state))); } fn emit_char(&mut self, c: char) { @@ -444,10 +435,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> { } EndTag => { if !self.current_tag_attrs.is_empty() { - self.emit_error(Borrowed("Attributes on an end tag")); + self.emit_error(Error::AttributesOnEndTag); } if self.current_tag_self_closing { - self.emit_error(Borrowed("Self-closing end tag")); + self.emit_error(Error::SelfClosingEndTag); } } } @@ -547,7 +538,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { }; if dup { - self.emit_error(Borrowed("Duplicate attribute")); + self.emit_error(Error::DuplicateAttribute); self.current_attr_name.clear(); self.current_attr_value.clear(); } else { @@ -606,7 +597,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { self.get_char(input); } - fn emit_error(&mut self, error: Cow<'static, str>) { + fn emit_error(&mut self, error: Error) { self.process_token_and_continue(ParseError(error)); } } @@ -2451,11 +2442,16 @@ mod test { #[test] #[cfg(feature = "named-entities")] fn named_entities() { + use crate::error::{CharRefError, Error}; + let opts = opts(); let vector = vec![String::from("&\r\n"), String::from("&aamp;\r\n")]; let expected = vec![ (3, CharacterTokens("&\n".into())), - (3, ParseError("Invalid character reference".into())), + ( + 3, + ParseError(Error::CharRef(CharRefError::InvalidNamedCharRef)), + ), (4, CharacterTokens("&aamp;\n".into())), ]; let results = tokenize(vector, opts); |