aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-12 07:39:17 +0200
committerMartin Fischer <martin@push-f.com>2023-09-12 07:41:46 +0200
commit4b4a7ef0d14dae620230beac0a328bc250654446 (patch)
tree591e955617490a364076f976c7083cb72a3cd0a5 /src
parent0695ee9ca63cc3faa51378b35381d62f5a00b846 (diff)
docs: move warning from DefaultEmitter to Tokenizer
Diffstat (limited to 'src')
-rw-r--r--src/default_emitter.rs16
-rw-r--r--src/tokenizer.rs18
2 files changed, 16 insertions, 18 deletions
diff --git a/src/default_emitter.rs b/src/default_emitter.rs
index c957b20..9d5ab52 100644
--- a/src/default_emitter.rs
+++ b/src/default_emitter.rs
@@ -11,22 +11,6 @@ use crate::Emitter;
use crate::Error;
/// The default implementation of [`Emitter`], used to produce tokens.
-///
-/// # Warning
-///
-/// * Using the DefaultEmitter without calling [`Tokenizer::set_state`]
-/// results in wrong state transitions:
-///
-/// ```
-/// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token};
-/// let emitter = DefaultEmitter::default();
-/// let html = "<script><b>";
-/// let mut tokens = Tokenizer::new(html, emitter).flatten();
-/// assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "script"));
-/// assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "b"));
-/// ```
-///
-/// [`Tokenizer::set_state`]: crate::Tokenizer::set_state
pub struct DefaultEmitter<O = NoopOffset> {
current_characters: String,
current_token: Option<Token<O>>,
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index cfd8eea..a37a832 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -33,9 +33,23 @@ impl<T: Copy> Stack2<T> {
/// An HTML tokenizer.
///
-/// Note that for proper HTML parsing, you'll have to implement [tree construction]
-/// based on this Tokenizer yourself (since this crate currently does not implement it).
+/// # Warning
///
+/// Iterating over the tokenizer directly without calling [`Tokenizer::set_state`]
+/// results in wrong state transitions:
+///
+/// ```
+/// # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token};
+/// let emitter = DefaultEmitter::default();
+/// let html = "<script><b>";
+/// let mut tokens = Tokenizer::new(html, emitter).flatten();
+/// assert!(matches!(tokens.next(), Some(Event::Token(Token::StartTag(_)))));
+/// assert!(matches!(tokens.next(), Some(Event::Token(Token::StartTag(_)))));
+/// ```
+///
+/// Instead use the [`NaiveParser`] (in the future this crate will also provide a proper implementation of [tree construction]).
+///
+/// [`NaiveParser`]: crate::NaiveParser
/// [tree construction]: https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
pub struct Tokenizer<R: Reader, O, E: Emitter<O>> {
eof: bool,