diff options
-rw-r--r-- | README.md | 17 | ||||
-rw-r--r-- | examples/spans.rs | 34 | ||||
-rw-r--r-- | src/lib.rs | 1 | ||||
-rw-r--r-- | tests/misc.rs | 34 |
4 files changed, 86 insertions, 0 deletions
@@ -32,6 +32,22 @@ for token in NaiveParser::new(html).flatten() { assert_eq!(new_html, "<title>hello world</title>"); ``` +This library can provide source spans. For an example, see +[`examples/spans.rs`], which produces the following output: + +```output id=spans +note: + ┌─ file.html:1:2 + │ +1 │ <img src=example.jpg alt="some description"> + │ ^^^ ^^^ ^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^ attr value + │ │ │ │ │ + │ │ │ │ attr name + │ │ │ attr value + │ │ attr name + │ tag name +``` + ## Limitations * This crate does not yet implement tree construction @@ -63,6 +79,7 @@ Licensed under the MIT license, see [the LICENSE file]. [parsing model]: https://html.spec.whatwg.org/multipage/parsing.html#overview-of-the-parsing-model +[`examples/spans.rs`]: ./examples/spans.rs [character encoding detection]: https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding [html5lib tokenizer test suite]: https://github.com/html5lib/html5lib-tests/tree/master/tokenizer [html5gum]: https://crates.io/crates/html5gum diff --git a/examples/spans.rs b/examples/spans.rs new file mode 100644 index 0000000..fc3c6a1 --- /dev/null +++ b/examples/spans.rs @@ -0,0 +1,34 @@ +use codespan_reporting::{ + diagnostic::{Diagnostic, Label}, + files::SimpleFiles, + term, + term::termcolor::{ColorChoice, StandardStream}, +}; +use html5tokenizer::{offset::PosTrackingReader, NaiveParser, Token}; + +fn main() { + let html = r#"<img src=example.jpg alt="some description">"#; + let parser = NaiveParser::new(PosTrackingReader::new(html)); + + let Token::StartTag(tag) = parser.flatten().next().unwrap() else { + panic!() + }; + + let mut files = SimpleFiles::new(); + let file_id = files.add("file.html", html); + + let mut labels = Vec::new(); + + labels.push(Label::primary(file_id, tag.name_span).with_message("tag name")); + + for attr in &tag.attributes { + labels.push(Label::primary(file_id, attr.name_span()).with_message("attr name")); + labels.push(Label::primary(file_id, attr.value_span().unwrap()).with_message("attr value")); + } + + let diagnostic = Diagnostic::note().with_labels(labels); + + let mut writer = StandardStream::stdout(ColorChoice::Never); + let config = codespan_reporting::term::Config::default(); + term::emit(&mut writer, &config, &files, &diagnostic).unwrap(); +} @@ -2,6 +2,7 @@ // This is an HTML parser. HTML can be untrusted input from the internet. #![forbid(clippy::undocumented_unsafe_blocks)] #![forbid(clippy::multiple_unsafe_ops_per_block)] +#![doc = concat!("[`examples/spans.rs`]: ", file_url!("examples/spans.rs"))] #![doc = concat!("[changelog]: ", file_url!("CHANGELOG.md"))] #![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))] #![doc = include_str!("../README.md")] diff --git a/tests/misc.rs b/tests/misc.rs index 416e506..0db0606 100644 --- a/tests/misc.rs +++ b/tests/misc.rs @@ -1,3 +1,9 @@ +use std::{ + fs::File, + io::{BufRead, BufReader}, + process::Command, +}; + use similar_asserts::assert_eq; use walkdir::{DirEntry, WalkDir}; @@ -45,3 +51,31 @@ fn is_source_file(entry: &DirEntry) -> bool { !filename.starts_with('.') // .git, etc. } + +#[test] +fn example_output_in_readme() { + let output = Command::new("cargo") + .args(["run", "--example", "spans"]) + .output() + .unwrap() + .stdout; + + let expected = std::str::from_utf8(&output) + .unwrap() + .trim_end() + .lines() + .map(|s| s.trim_end().to_string()) + .collect::<Vec<_>>() + .join("\n"); + + let actual = BufReader::new(File::open("README.md").unwrap()) + .lines() + .flatten() + .skip_while(|l| l != "```output id=spans") + .skip(1) + .take_while(|l| l != "```") + .collect::<Vec<_>>() + .join("\n"); + + assert_eq!(actual, expected); +} |