aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r--src/tokenizer.rs18
1 files changed, 16 insertions, 2 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index cfd8eea..a37a832 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -33,9 +33,23 @@ impl<T: Copy> Stack2<T> {
/// An HTML tokenizer.
///
-/// Note that for proper HTML parsing, you'll have to implement [tree construction]
-/// based on this Tokenizer yourself (since this crate currently does not implement it).
+/// # Warning
///
+/// Iterating over the tokenizer directly without calling [`Tokenizer::set_state`]
+/// results in wrong state transitions:
+///
+/// ```
+/// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token};
+/// let emitter = DefaultEmitter::default();
+/// let html = "<script><b>";
+/// let mut tokens = Tokenizer::new(html, emitter).flatten();
+/// assert!(matches!(tokens.next(), Some(Event::Token(Token::StartTag(_)))));
+/// assert!(matches!(tokens.next(), Some(Event::Token(Token::StartTag(_)))));
+/// ```
+///
+/// Instead use the [`NaiveParser`] (in the future this crate will also provide a proper implementation of [tree construction]).
+///
+/// [`NaiveParser`]: crate::NaiveParser
/// [tree construction]: https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
pub struct Tokenizer<R: Reader, O, E: Emitter<O>> {
eof: bool,