aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md17
-rw-r--r--examples/spans.rs34
-rw-r--r--src/lib.rs1
-rw-r--r--tests/misc.rs34
4 files changed, 86 insertions, 0 deletions
diff --git a/README.md b/README.md
index 2683a6d..740d857 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,22 @@ for token in NaiveParser::new(html).flatten() {
assert_eq!(new_html, "<title>hello world</title>");
```
+This library can provide source spans. For an example, see
+[`examples/spans.rs`], which produces the following output:
+
+```output id=spans
+note:
+ ┌─ file.html:1:2
+ │
+1 │ <img src=example.jpg alt="some description">
+ │ ^^^ ^^^ ^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^ attr value
+ │ │ │ │ │
+ │ │ │ │ attr name
+ │ │ │ attr value
+ │ │ attr name
+ │ tag name
+```
+
## Limitations
* This crate does not yet implement tree construction
@@ -63,6 +79,7 @@ Licensed under the MIT license, see [the LICENSE file].
[parsing model]: https://html.spec.whatwg.org/multipage/parsing.html#overview-of-the-parsing-model
+[`examples/spans.rs`]: ./examples/spans.rs
[character encoding detection]: https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
[html5lib tokenizer test suite]: https://github.com/html5lib/html5lib-tests/tree/master/tokenizer
[html5gum]: https://crates.io/crates/html5gum
diff --git a/examples/spans.rs b/examples/spans.rs
new file mode 100644
index 0000000..fc3c6a1
--- /dev/null
+++ b/examples/spans.rs
@@ -0,0 +1,34 @@
+use codespan_reporting::{
+ diagnostic::{Diagnostic, Label},
+ files::SimpleFiles,
+ term,
+ term::termcolor::{ColorChoice, StandardStream},
+};
+use html5tokenizer::{offset::PosTrackingReader, NaiveParser, Token};
+
+fn main() {
+ let html = r#"<img src=example.jpg alt="some description">"#;
+ let parser = NaiveParser::new(PosTrackingReader::new(html));
+
+ let Token::StartTag(tag) = parser.flatten().next().unwrap() else {
+ panic!()
+ };
+
+ let mut files = SimpleFiles::new();
+ let file_id = files.add("file.html", html);
+
+ let mut labels = Vec::new();
+
+ labels.push(Label::primary(file_id, tag.name_span).with_message("tag name"));
+
+ for attr in &tag.attributes {
+ labels.push(Label::primary(file_id, attr.name_span()).with_message("attr name"));
+ labels.push(Label::primary(file_id, attr.value_span().unwrap()).with_message("attr value"));
+ }
+
+ let diagnostic = Diagnostic::note().with_labels(labels);
+
+ let mut writer = StandardStream::stdout(ColorChoice::Never);
+ let config = codespan_reporting::term::Config::default();
+ term::emit(&mut writer, &config, &files, &diagnostic).unwrap();
+}
diff --git a/src/lib.rs b/src/lib.rs
index 5638a02..6f0cdd0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,6 +2,7 @@
// This is an HTML parser. HTML can be untrusted input from the internet.
#![forbid(clippy::undocumented_unsafe_blocks)]
#![forbid(clippy::multiple_unsafe_ops_per_block)]
+#![doc = concat!("[`examples/spans.rs`]: ", file_url!("examples/spans.rs"))]
#![doc = concat!("[changelog]: ", file_url!("CHANGELOG.md"))]
#![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))]
#![doc = include_str!("../README.md")]
diff --git a/tests/misc.rs b/tests/misc.rs
index 416e506..0db0606 100644
--- a/tests/misc.rs
+++ b/tests/misc.rs
@@ -1,3 +1,9 @@
+use std::{
+ fs::File,
+ io::{BufRead, BufReader},
+ process::Command,
+};
+
use similar_asserts::assert_eq;
use walkdir::{DirEntry, WalkDir};
@@ -45,3 +51,31 @@ fn is_source_file(entry: &DirEntry) -> bool {
!filename.starts_with('.') // .git, etc.
}
+
+#[test]
+fn example_output_in_readme() {
+ let output = Command::new("cargo")
+ .args(["run", "--example", "spans"])
+ .output()
+ .unwrap()
+ .stdout;
+
+ let expected = std::str::from_utf8(&output)
+ .unwrap()
+ .trim_end()
+ .lines()
+ .map(|s| s.trim_end().to_string())
+ .collect::<Vec<_>>()
+ .join("\n");
+
+ let actual = BufReader::new(File::open("README.md").unwrap())
+ .lines()
+ .flatten()
+ .skip_while(|l| l != "```output id=spans")
+ .skip(1)
+ .take_while(|l| l != "```")
+ .collect::<Vec<_>>()
+ .join("\n");
+
+ assert_eq!(actual, expected);
+}