summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/default_emitter.rs11
-rw-r--r--src/naive_parser.rs8
-rw-r--r--src/token.rs11
-rw-r--r--src/tokenizer.rs5
4 files changed, 21 insertions, 14 deletions
diff --git a/src/default_emitter.rs b/src/default_emitter.rs
index a4c5a63..e89fa5e 100644
--- a/src/default_emitter.rs
+++ b/src/default_emitter.rs
@@ -17,6 +17,7 @@ pub struct DefaultEmitter<O = NoopOffset> {
current_attribute: Option<(String, crate::token::AttrInternal<O>)>,
seen_attributes: BTreeSet<String>,
emitted_tokens: VecDeque<Token<O>>,
+ errors: VecDeque<(Error, Range<O>)>,
attr_in_end_tag_span: Option<Range<O>>,
}
@@ -28,11 +29,19 @@ impl<O> Default for DefaultEmitter<O> {
current_attribute: None,
seen_attributes: BTreeSet::new(),
emitted_tokens: VecDeque::new(),
+ errors: VecDeque::new(),
attr_in_end_tag_span: None,
}
}
}
+impl<O> DefaultEmitter<O> {
+ /// Removes all encountered tokenizer errors and returns them as an iterator.
+ pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ {
+ self.errors.drain(0..)
+ }
+}
+
impl<O> Iterator for DefaultEmitter<O> {
type Item = Token<O>;
@@ -43,7 +52,7 @@ impl<O> Iterator for DefaultEmitter<O> {
impl<O: Offset> Emitter<O> for DefaultEmitter<O> {
fn report_error(&mut self, error: Error, span: Range<O>) {
- self.emitted_tokens.push_front(Token::Error { error, span });
+ self.errors.push_back((error, span));
}
fn emit_eof(&mut self) {
diff --git a/src/naive_parser.rs b/src/naive_parser.rs
index 10eb98d..5bf002b 100644
--- a/src/naive_parser.rs
+++ b/src/naive_parser.rs
@@ -14,11 +14,10 @@ use crate::{Emitter, Event, State, Tokenizer};
/// * it naively emits any CDATA sections as bogus comments, for example:
///
/// ```
-/// # use html5tokenizer::{Error, NaiveParser, Tokenizer, Token};
+/// # use html5tokenizer::{NaiveParser, Token};
/// let html = "<svg><![CDATA[I love SVG]]>";
/// let mut tokens = NaiveParser::new(html).flatten();
/// assert!(matches!(tokens.next().unwrap(), Token::StartTag(tag) if tag.name == "svg"));
-/// assert!(matches!(tokens.next().unwrap(), Token::Error {error: Error::CdataInHtmlContent, ..}));
/// assert!(matches!(tokens.next().unwrap(), Token::Comment(_bogus_comment)));
/// ```
///
@@ -59,6 +58,11 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> NaiveParser<R, O, E> {
tokenizer.naively_switch_state = true;
NaiveParser { tokenizer }
}
+
+ /// Returns a mutable reference to the emitter.
+ pub fn emitter_mut(&mut self) -> &mut E {
+ self.tokenizer.emitter_mut()
+ }
}
impl<R, O, E> Iterator for NaiveParser<R, O, E>
diff --git a/src/token.rs b/src/token.rs
index 48c90f7..c599cd5 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -5,7 +5,6 @@ use std::iter::FromIterator;
use std::ops::{Index, Range};
use crate::offset::Offset;
-use crate::Error;
/// A type for the tokens emitted by a WHATWG-compliant HTML tokenizer.
#[derive(Debug, Eq, PartialEq)]
@@ -20,16 +19,6 @@ pub enum Token<O> {
Comment(Comment<O>),
/// An HTML doctype declaration.
Doctype(Doctype<O>),
- /// An HTML parsing error.
- ///
- /// Can be skipped over, the tokenizer is supposed to recover from the error and continues with
- /// more tokens afterward.
- Error {
- /// What kind of error occurred.
- error: Error,
- /// The source code span of the error.
- span: Range<O>,
- },
}
/// An HTML start tag, such as `<p>` or `<a>`.
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7e1e85f..270d3d0 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -111,6 +111,11 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
pub fn handle_cdata_open(&mut self, action: CdataAction) {
machine::handle_cdata_open(self, action);
}
+
+ /// Returns a mutable reference to the emitter.
+ pub fn emitter_mut(&mut self) -> &mut E {
+ &mut self.emitter
+ }
}
/// Used by [`Tokenizer::handle_cdata_open`] to determine how to process `<![CDATA[`