docs: add example for NaiveParser's CDATA handling

author: Martin Fischer <martin@push-f.com> 2023-08-28 13:32:17 +0200
committer: Martin Fischer <martin@push-f.com> 2023-09-03 23:00:05 +0200
commit: ef0f2208992121a45019dce4b5753638607eeb06 (patch)
tree: ee02a7c16659beb3724f7a1c61bda50f77b5b546 /src
parent: c993dfb0c071d5015d019ffa94b438214a842975 (diff)
1 files changed, 12 insertions, 1 deletions
diff --git a/src/naive_parser.rs b/src/naive_parser.rs
index f126dfd..f42072a 100644
--- a/src/naive_parser.rs
+++ b/src/naive_parser.rs
@@ -7,9 +7,20 @@ use crate::{Emitter, Event, State, Tokenizer};
 /// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction).
 ///
 /// * it **does not** correct [misnested tags]
+///
 /// * it **does not** recognize implicitly self-closing elements like
 ///  `<img>`, it will simply emit a start token
-/// * it naively emits any CDATA sections as bogus comments
+///
+/// * it naively emits any CDATA sections as bogus comments, for example:
+///
+///   ```
+///   # use html5tokenizer::{Error, NaiveParser, Tokenizer, Token};
+///   let html = "<svg><![CDATA[I love SVG]]>";
+///   let mut tokens = NaiveParser::new(html).flatten();
+///   assert!(matches!(tokens.next().unwrap(), Token::StartTag(tag) if tag.name == "svg"));
+///   assert!(matches!(tokens.next().unwrap(), Token::Error {error: Error::CdataInHtmlContent, ..}));
+///   assert!(matches!(tokens.next().unwrap(), Token::Comment(_bogus_comment)));
+///   ```
 ///
 /// It has similar caveats to the [HTMLParser] from the Python standard library.
 /// It should suffice for web scraping but you wouldn't use it to implement a browser.
author	Martin Fischer <martin@push-f.com>	2023-08-28 13:32:17 +0200
committer	Martin Fischer <martin@push-f.com>	2023-09-03 23:00:05 +0200
commit	ef0f2208992121a45019dce4b5753638607eeb06 (patch)
tree	ee02a7c16659beb3724f7a1c61bda50f77b5b546 /src
parent	c993dfb0c071d5015d019ffa94b438214a842975 (diff)