From 0c590e90564870fa8600460c866c7395b7d865cd Mon Sep 17 00:00:00 2001
From: Martin Fischer <martin@push-f.com>
Date: Sun, 3 Sep 2023 23:02:01 +0200
Subject: docs: add spans example

---
 README.md         | 17 +++++++++++++++++
 examples/spans.rs | 34 ++++++++++++++++++++++++++++++++++
 src/lib.rs        |  1 +
 tests/misc.rs     | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 86 insertions(+)
 create mode 100644 examples/spans.rs

diff --git a/README.md b/README.md
index 2683a6d..740d857 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,22 @@ for token in NaiveParser::new(html).flatten() {
 assert_eq!(new_html, "<title>hello world</title>");
 ```
 
+This library can provide source spans. For an example, see
+[`examples/spans.rs`], which produces the following output:
+
+```output id=spans
+note:
+  ┌─ file.html:1:2
+  │
+1 │ <img src=example.jpg alt="some description">
+  │  ^^^ ^^^ ^^^^^^^^^^^ ^^^  ^^^^^^^^^^^^^^^^ attr value
+  │  │   │   │           │
+  │  │   │   │           attr name
+  │  │   │   attr value
+  │  │   attr name
+  │  tag name
+```
+
 ## Limitations
 
 * This crate does not yet implement tree construction  
@@ -63,6 +79,7 @@ Licensed under the MIT license, see [the LICENSE file].
 
 
 [parsing model]: https://html.spec.whatwg.org/multipage/parsing.html#overview-of-the-parsing-model
+[`examples/spans.rs`]: ./examples/spans.rs
 [character encoding detection]: https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
 [html5lib tokenizer test suite]: https://github.com/html5lib/html5lib-tests/tree/master/tokenizer
 [html5gum]: https://crates.io/crates/html5gum
diff --git a/examples/spans.rs b/examples/spans.rs
new file mode 100644
index 0000000..fc3c6a1
--- /dev/null
+++ b/examples/spans.rs
@@ -0,0 +1,34 @@
+use codespan_reporting::{
+    diagnostic::{Diagnostic, Label},
+    files::SimpleFiles,
+    term,
+    term::termcolor::{ColorChoice, StandardStream},
+};
+use html5tokenizer::{offset::PosTrackingReader, NaiveParser, Token};
+
+fn main() {
+    let html = r#"<img src=example.jpg alt="some description">"#;
+    let parser = NaiveParser::new(PosTrackingReader::new(html));
+
+    let Token::StartTag(tag) = parser.flatten().next().unwrap() else {
+        panic!()
+    };
+
+    let mut files = SimpleFiles::new();
+    let file_id = files.add("file.html", html);
+
+    let mut labels = Vec::new();
+
+    labels.push(Label::primary(file_id, tag.name_span).with_message("tag name"));
+
+    for attr in &tag.attributes {
+        labels.push(Label::primary(file_id, attr.name_span()).with_message("attr name"));
+        labels.push(Label::primary(file_id, attr.value_span().unwrap()).with_message("attr value"));
+    }
+
+    let diagnostic = Diagnostic::note().with_labels(labels);
+
+    let mut writer = StandardStream::stdout(ColorChoice::Never);
+    let config = codespan_reporting::term::Config::default();
+    term::emit(&mut writer, &config, &files, &diagnostic).unwrap();
+}
diff --git a/src/lib.rs b/src/lib.rs
index 5638a02..6f0cdd0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,6 +2,7 @@
 // This is an HTML parser. HTML can be untrusted input from the internet.
 #![forbid(clippy::undocumented_unsafe_blocks)]
 #![forbid(clippy::multiple_unsafe_ops_per_block)]
+#![doc = concat!("[`examples/spans.rs`]: ", file_url!("examples/spans.rs"))]
 #![doc = concat!("[changelog]: ", file_url!("CHANGELOG.md"))]
 #![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))]
 #![doc = include_str!("../README.md")]
diff --git a/tests/misc.rs b/tests/misc.rs
index 416e506..0db0606 100644
--- a/tests/misc.rs
+++ b/tests/misc.rs
@@ -1,3 +1,9 @@
+use std::{
+    fs::File,
+    io::{BufRead, BufReader},
+    process::Command,
+};
+
 use similar_asserts::assert_eq;
 use walkdir::{DirEntry, WalkDir};
 
@@ -45,3 +51,31 @@ fn is_source_file(entry: &DirEntry) -> bool {
 
     !filename.starts_with('.') // .git, etc.
 }
+
+#[test]
+fn example_output_in_readme() {
+    let output = Command::new("cargo")
+        .args(["run", "--example", "spans"])
+        .output()
+        .unwrap()
+        .stdout;
+
+    let expected = std::str::from_utf8(&output)
+        .unwrap()
+        .trim_end()
+        .lines()
+        .map(|s| s.trim_end().to_string())
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    let actual = BufReader::new(File::open("README.md").unwrap())
+        .lines()
+        .flatten()
+        .skip_while(|l| l != "```output id=spans")
+        .skip(1)
+        .take_while(|l| l != "```")
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    assert_eq!(actual, expected);
+}
-- 
cgit v1.2.3