diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/naive_parser.rs | 13 | 
1 files changed, 12 insertions, 1 deletions
| diff --git a/src/naive_parser.rs b/src/naive_parser.rs index f126dfd..f42072a 100644 --- a/src/naive_parser.rs +++ b/src/naive_parser.rs @@ -7,9 +7,20 @@ use crate::{Emitter, Event, State, Tokenizer};  /// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction).  ///  /// * it **does not** correct [misnested tags] +///  /// * it **does not** recognize implicitly self-closing elements like  ///  `<img>`, it will simply emit a start token -/// * it naively emits any CDATA sections as bogus comments +/// +/// * it naively emits any CDATA sections as bogus comments, for example: +/// +///   ``` +///   # use html5tokenizer::{Error, NaiveParser, Tokenizer, Token}; +///   let html = "<svg><![CDATA[I love SVG]]>"; +///   let mut tokens = NaiveParser::new(html).flatten(); +///   assert!(matches!(tokens.next().unwrap(), Token::StartTag(tag) if tag.name == "svg")); +///   assert!(matches!(tokens.next().unwrap(), Token::Error {error: Error::CdataInHtmlContent, ..})); +///   assert!(matches!(tokens.next().unwrap(), Token::Comment(_bogus_comment))); +///   ```  ///  /// It has similar caveats to the [HTMLParser] from the Python standard library.  /// It should suffice for web scraping but you wouldn't use it to implement a browser. | 
