diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/default_emitter.rs | 11 | ||||
-rw-r--r-- | src/naive_parser.rs | 8 | ||||
-rw-r--r-- | src/token.rs | 11 | ||||
-rw-r--r-- | src/tokenizer.rs | 5 |
4 files changed, 21 insertions, 14 deletions
diff --git a/src/default_emitter.rs b/src/default_emitter.rs index a4c5a63..e89fa5e 100644 --- a/src/default_emitter.rs +++ b/src/default_emitter.rs @@ -17,6 +17,7 @@ pub struct DefaultEmitter<O = NoopOffset> { current_attribute: Option<(String, crate::token::AttrInternal<O>)>, seen_attributes: BTreeSet<String>, emitted_tokens: VecDeque<Token<O>>, + errors: VecDeque<(Error, Range<O>)>, attr_in_end_tag_span: Option<Range<O>>, } @@ -28,11 +29,19 @@ impl<O> Default for DefaultEmitter<O> { current_attribute: None, seen_attributes: BTreeSet::new(), emitted_tokens: VecDeque::new(), + errors: VecDeque::new(), attr_in_end_tag_span: None, } } } +impl<O> DefaultEmitter<O> { + /// Removes all encountered tokenizer errors and returns them as an iterator. + pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ { + self.errors.drain(0..) + } +} + impl<O> Iterator for DefaultEmitter<O> { type Item = Token<O>; @@ -43,7 +52,7 @@ impl<O> Iterator for DefaultEmitter<O> { impl<O: Offset> Emitter<O> for DefaultEmitter<O> { fn report_error(&mut self, error: Error, span: Range<O>) { - self.emitted_tokens.push_front(Token::Error { error, span }); + self.errors.push_back((error, span)); } fn emit_eof(&mut self) { diff --git a/src/naive_parser.rs b/src/naive_parser.rs index 10eb98d..5bf002b 100644 --- a/src/naive_parser.rs +++ b/src/naive_parser.rs @@ -14,11 +14,10 @@ use crate::{Emitter, Event, State, Tokenizer}; /// * it naively emits any CDATA sections as bogus comments, for example: /// /// ``` -/// # use html5tokenizer::{Error, NaiveParser, Tokenizer, Token}; +/// # use html5tokenizer::{NaiveParser, Token}; /// let html = "<svg><![CDATA[I love SVG]]>"; /// let mut tokens = NaiveParser::new(html).flatten(); /// assert!(matches!(tokens.next().unwrap(), Token::StartTag(tag) if tag.name == "svg")); -/// assert!(matches!(tokens.next().unwrap(), Token::Error {error: Error::CdataInHtmlContent, ..})); /// assert!(matches!(tokens.next().unwrap(), Token::Comment(_bogus_comment))); /// ``` /// @@ -59,6 +58,11 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> NaiveParser<R, O, E> { tokenizer.naively_switch_state = true; NaiveParser { tokenizer } } + + /// Returns a mutable reference to the emitter. + pub fn emitter_mut(&mut self) -> &mut E { + self.tokenizer.emitter_mut() + } } impl<R, O, E> Iterator for NaiveParser<R, O, E> diff --git a/src/token.rs b/src/token.rs index 48c90f7..c599cd5 100644 --- a/src/token.rs +++ b/src/token.rs @@ -5,7 +5,6 @@ use std::iter::FromIterator; use std::ops::{Index, Range}; use crate::offset::Offset; -use crate::Error; /// A type for the tokens emitted by a WHATWG-compliant HTML tokenizer. #[derive(Debug, Eq, PartialEq)] @@ -20,16 +19,6 @@ pub enum Token<O> { Comment(Comment<O>), /// An HTML doctype declaration. Doctype(Doctype<O>), - /// An HTML parsing error. - /// - /// Can be skipped over, the tokenizer is supposed to recover from the error and continues with - /// more tokens afterward. - Error { - /// What kind of error occurred. - error: Error, - /// The source code span of the error. - span: Range<O>, - }, } /// An HTML start tag, such as `<p>` or `<a>`. diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7e1e85f..270d3d0 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -111,6 +111,11 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> { pub fn handle_cdata_open(&mut self, action: CdataAction) { machine::handle_cdata_open(self, action); } + + /// Returns a mutable reference to the emitter. + pub fn emitter_mut(&mut self) -> &mut E { + &mut self.emitter + } } /// Used by [`Tokenizer::handle_cdata_open`] to determine how to process `<![CDATA[` |