diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/default_emitter.rs | 11 | ||||
| -rw-r--r-- | src/naive_parser.rs | 8 | ||||
| -rw-r--r-- | src/token.rs | 11 | ||||
| -rw-r--r-- | src/tokenizer.rs | 5 | 
4 files changed, 21 insertions, 14 deletions
| diff --git a/src/default_emitter.rs b/src/default_emitter.rs index a4c5a63..e89fa5e 100644 --- a/src/default_emitter.rs +++ b/src/default_emitter.rs @@ -17,6 +17,7 @@ pub struct DefaultEmitter<O = NoopOffset> {      current_attribute: Option<(String, crate::token::AttrInternal<O>)>,      seen_attributes: BTreeSet<String>,      emitted_tokens: VecDeque<Token<O>>, +    errors: VecDeque<(Error, Range<O>)>,      attr_in_end_tag_span: Option<Range<O>>,  } @@ -28,11 +29,19 @@ impl<O> Default for DefaultEmitter<O> {              current_attribute: None,              seen_attributes: BTreeSet::new(),              emitted_tokens: VecDeque::new(), +            errors: VecDeque::new(),              attr_in_end_tag_span: None,          }      }  } +impl<O> DefaultEmitter<O> { +    /// Removes all encountered tokenizer errors and returns them as an iterator. +    pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ { +        self.errors.drain(0..) +    } +} +  impl<O> Iterator for DefaultEmitter<O> {      type Item = Token<O>; @@ -43,7 +52,7 @@ impl<O> Iterator for DefaultEmitter<O> {  impl<O: Offset> Emitter<O> for DefaultEmitter<O> {      fn report_error(&mut self, error: Error, span: Range<O>) { -        self.emitted_tokens.push_front(Token::Error { error, span }); +        self.errors.push_back((error, span));      }      fn emit_eof(&mut self) { diff --git a/src/naive_parser.rs b/src/naive_parser.rs index 10eb98d..5bf002b 100644 --- a/src/naive_parser.rs +++ b/src/naive_parser.rs @@ -14,11 +14,10 @@ use crate::{Emitter, Event, State, Tokenizer};  /// * it naively emits any CDATA sections as bogus comments, for example:  ///  ///   ``` -///   # use html5tokenizer::{Error, NaiveParser, Tokenizer, Token}; +///   # use html5tokenizer::{NaiveParser, Token};  ///   let html = "<svg><![CDATA[I love SVG]]>";  ///   let mut tokens = NaiveParser::new(html).flatten();  ///   assert!(matches!(tokens.next().unwrap(), Token::StartTag(tag) if tag.name == "svg")); -///   assert!(matches!(tokens.next().unwrap(), Token::Error {error: Error::CdataInHtmlContent, ..}));  ///   assert!(matches!(tokens.next().unwrap(), Token::Comment(_bogus_comment)));  ///   ```  /// @@ -59,6 +58,11 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> NaiveParser<R, O, E> {          tokenizer.naively_switch_state = true;          NaiveParser { tokenizer }      } + +    /// Returns a mutable reference to the emitter. +    pub fn emitter_mut(&mut self) -> &mut E { +        self.tokenizer.emitter_mut() +    }  }  impl<R, O, E> Iterator for NaiveParser<R, O, E> diff --git a/src/token.rs b/src/token.rs index 48c90f7..c599cd5 100644 --- a/src/token.rs +++ b/src/token.rs @@ -5,7 +5,6 @@ use std::iter::FromIterator;  use std::ops::{Index, Range};  use crate::offset::Offset; -use crate::Error;  /// A type for the tokens emitted by a WHATWG-compliant HTML tokenizer.  #[derive(Debug, Eq, PartialEq)] @@ -20,16 +19,6 @@ pub enum Token<O> {      Comment(Comment<O>),      /// An HTML doctype declaration.      Doctype(Doctype<O>), -    /// An HTML parsing error. -    /// -    /// Can be skipped over, the tokenizer is supposed to recover from the error and continues with -    /// more tokens afterward. -    Error { -        /// What kind of error occurred. -        error: Error, -        /// The source code span of the error. -        span: Range<O>, -    },  }  /// An HTML start tag, such as `<p>` or `<a>`. diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7e1e85f..270d3d0 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -111,6 +111,11 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {      pub fn handle_cdata_open(&mut self, action: CdataAction) {          machine::handle_cdata_open(self, action);      } + +    /// Returns a mutable reference to the emitter. +    pub fn emitter_mut(&mut self) -> &mut E { +        &mut self.emitter +    }  }  /// Used by [`Tokenizer::handle_cdata_open`] to determine how to process `<