diff options
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r-- | src/tokenizer.rs | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs index cfd8eea..a37a832 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -33,9 +33,23 @@ impl<T: Copy> Stack2<T> { /// An HTML tokenizer. /// -/// Note that for proper HTML parsing, you'll have to implement [tree construction] -/// based on this Tokenizer yourself (since this crate currently does not implement it). +/// # Warning /// +/// Iterating over the tokenizer directly without calling [`Tokenizer::set_state`] +/// results in wrong state transitions: +/// +/// ``` +/// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token}; +/// let emitter = DefaultEmitter::default(); +/// let html = "<script><b>"; +/// let mut tokens = Tokenizer::new(html, emitter).flatten(); +/// assert!(matches!(tokens.next(), Some(Event::Token(Token::StartTag(_))))); +/// assert!(matches!(tokens.next(), Some(Event::Token(Token::StartTag(_))))); +/// ``` +/// +/// Instead use the [`NaiveParser`] (in the future this crate will also provide a proper implementation of [tree construction]). +/// +/// [`NaiveParser`]: crate::NaiveParser /// [tree construction]: https://html.spec.whatwg.org/multipage/parsing.html#tree-construction pub struct Tokenizer<R: Reader, O, E: Emitter<O>> { eof: bool, |