aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer/mod.rs
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2021-11-30 07:28:21 +0100
committerMartin Fischer <martin@push-f.com>2021-11-30 11:22:35 +0100
commit14f1a85d994ad97dae3d9de735fc51adb25d390a (patch)
tree0fa0d7c173a19dcb7117132325a801808302bcf8 /src/tokenizer/mod.rs
parentbaf1477c587fe22d27e94408cf2505d588ba007e (diff)
introduce Error enum
Diffstat (limited to 'src/tokenizer/mod.rs')
-rw-r--r--src/tokenizer/mod.rs40
1 files changed, 18 insertions, 22 deletions
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index 4511cf8..78101f6 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -9,6 +9,7 @@
//! The HTML5 tokenizer.
+use self::error::InternalState;
pub use self::interface::{Attribute, Doctype, Tag, TagKind, Token};
use self::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
use self::interface::{CommentToken, DoctypeToken, EndTag, StartTag, TagToken};
@@ -21,9 +22,9 @@ use self::states::{Rawtext, Rcdata, ScriptData, ScriptDataEscaped};
use self::char_ref::{CharRef, CharRefTokenizer};
+use crate::error::Error;
use crate::util::{smallcharset::SmallCharSet, str::lower_ascii_letter};
-use std::borrow::Cow::{self, Borrowed};
use std::collections::BTreeMap;
use std::default::Default;
use std::mem::replace;
@@ -34,6 +35,7 @@ use crate::util::buffer_queue::{FromSet, NotFromSet, SetResult};
pub use states::RawKind;
mod char_ref;
+pub mod error;
mod interface;
mod states;
@@ -292,8 +294,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
_ => false,
}
{
- let msg = format!("Bad character {}", c);
- self.emit_error(Cow::Owned(msg));
+ self.emit_error(Error::BadCharacter(c));
}
#[cfg(feature = "spans")]
@@ -400,24 +401,14 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}
fn bad_char_error(&mut self) {
- let msg = format_if!(
- self.opts.exact_errors,
- "Bad character",
- "Saw {} in state {:?}",
+ self.emit_error(Error::UnexpectedCharacter(
self.current_char,
- self.state
- );
- self.emit_error(msg);
+ InternalState(self.state),
+ ));
}
fn bad_eof_error(&mut self) {
- let msg = format_if!(
- self.opts.exact_errors,
- "Unexpected EOF",
- "Saw EOF in state {:?}",
- self.state
- );
- self.emit_error(msg);
+ self.emit_error(Error::UnexpectedEOF(InternalState(self.state)));
}
fn emit_char(&mut self, c: char) {
@@ -444,10 +435,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}
EndTag => {
if !self.current_tag_attrs.is_empty() {
- self.emit_error(Borrowed("Attributes on an end tag"));
+ self.emit_error(Error::AttributesOnEndTag);
}
if self.current_tag_self_closing {
- self.emit_error(Borrowed("Self-closing end tag"));
+ self.emit_error(Error::SelfClosingEndTag);
}
}
}
@@ -547,7 +538,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
};
if dup {
- self.emit_error(Borrowed("Duplicate attribute"));
+ self.emit_error(Error::DuplicateAttribute);
self.current_attr_name.clear();
self.current_attr_value.clear();
} else {
@@ -606,7 +597,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
self.get_char(input);
}
- fn emit_error(&mut self, error: Cow<'static, str>) {
+ fn emit_error(&mut self, error: Error) {
self.process_token_and_continue(ParseError(error));
}
}
@@ -2451,11 +2442,16 @@ mod test {
#[test]
#[cfg(feature = "named-entities")]
fn named_entities() {
+ use crate::error::{CharRefError, Error};
+
let opts = opts();
let vector = vec![String::from("&amp;\r\n"), String::from("&aamp;\r\n")];
let expected = vec![
(3, CharacterTokens("&\n".into())),
- (3, ParseError("Invalid character reference".into())),
+ (
+ 3,
+ ParseError(Error::CharRef(CharRefError::InvalidNamedCharRef)),
+ ),
(4, CharacterTokens("&aamp;\n".into())),
];
let results = tokenize(vector, opts);