diff options
author | Martin Fischer <martin@push-f.com> | 2023-08-18 16:54:43 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2023-08-19 13:41:55 +0200 |
commit | 4682f104ea24fc257c22dc12db1a3dad1323662a (patch) | |
tree | 8815652555207038cc0837f25b9ef35068bbc16a | |
parent | 0c495ba984436cccc6caeed66639a2b61095dbad (diff) |
docs: link multipage version of HTML spec
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | src/emitter.rs | 2 | ||||
-rw-r--r-- | src/error.rs | 2 | ||||
-rw-r--r-- | src/tokenizer.rs | 16 | ||||
-rw-r--r-- | tests/misc.rs | 47 |
6 files changed, 61 insertions, 13 deletions
@@ -21,6 +21,7 @@ include = ["src/**/*", "LICENSE", "README.md"] codespan-reporting = "0.11.1" insta = "1.31.0" similar-asserts = { workspace = true } +walkdir = "2.3.3" [features] # Feature used by integration tests in tests/ to get access to library internals. @@ -69,7 +69,7 @@ Licensed under the MIT license, see [`./LICENSE`](./LICENSE). [html5gum]: https://crates.io/crates/html5gum -[tokenization]: https://html.spec.whatwg.org/#tokenization +[tokenization]: https://html.spec.whatwg.org/multipage/parsing.html#tokenization [html5lib's tokenizer test suite]: https://github.com/html5lib/html5lib-tests/tree/master/tokenizer -[charset detection]: https://html.spec.whatwg.org/#determining-the-character-encoding -[misnested tags]: https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser +[charset detection]: https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding +[misnested tags]: https://html.spec.whatwg.org/multipage/parsing.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser diff --git a/src/emitter.rs b/src/emitter.rs index 4fc2159..d1180a5 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -521,7 +521,7 @@ impl<O: Offset> Comment<O> { pub struct Doctype<O> { /// The [force-quirks flag]. /// - /// [force-quirks flag]: https://html.spec.whatwg.org/#force-quirks-flag + /// [force-quirks flag]: https://html.spec.whatwg.org/multipage/parsing.html#force-quirks-flag pub force_quirks: bool, /// The doctype's name. For HTML documents this is "html". diff --git a/src/error.rs b/src/error.rs index 401937b..3ba8f63 100644 --- a/src/error.rs +++ b/src/error.rs @@ -4,7 +4,7 @@ macro_rules! impl_error { )*) => { /// All [parse errors] this tokenizer can emit. /// - /// [parse errors]: https://html.spec.whatwg.org/#parse-errors + /// [parse errors]: https://html.spec.whatwg.org/multipage/parsing.html#parse-errors #[derive(Debug, Eq, PartialEq)] pub enum Error { $( diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3a6fb32..469cbd1 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -76,31 +76,31 @@ impl<R: Reader, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> { pub enum State { /// The [data state]. /// - /// [data state]: https://html.spec.whatwg.org/#data-state + /// [data state]: https://html.spec.whatwg.org/multipage/parsing.html#data-state Data, /// The [PLAINTEXT state]. /// - /// [PLAINTEXT state]: https://html.spec.whatwg.org/#plaintext-state + /// [PLAINTEXT state]: https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state PlainText, /// The [RCDATA state]. /// - /// [RCDATA state]: https://html.spec.whatwg.org/#rcdata-state + /// [RCDATA state]: https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state RcData, /// The [RAWTEXT state]. /// - /// [RAWTEXT state]: https://html.spec.whatwg.org/#rawtext-state + /// [RAWTEXT state]: https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state RawText, /// The [script data state]. /// - /// [script data state]: https://html.spec.whatwg.org/#script-data-state + /// [script data state]: https://html.spec.whatwg.org/multipage/parsing.html#script-data-state ScriptData, /// The [script data escaped state]. /// - /// [script data escaped state]: https://html.spec.whatwg.org/#script-data-escaped-state + /// [script data escaped state]: https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state ScriptDataEscaped, /// The [script data double escaped state]. /// - /// [script data double escaped state]: https://html.spec.whatwg.org/#script-data-double-escaped-state + /// [script data double escaped state]: https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state ScriptDataDoubleEscaped, } @@ -145,7 +145,7 @@ impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> { /// /// See also WHATWG's definition of [appropriate end tag token]. /// - /// [appropriate end tag token]: https://html.spec.whatwg.org/#appropriate-end-tag-token + /// [appropriate end tag token]: https://html.spec.whatwg.org/multipage/parsing.html#appropriate-end-tag-token #[inline] pub(crate) fn current_end_tag_is_appropriate(&mut self) -> bool { self.current_tag_name == self.last_start_tag_name diff --git a/tests/misc.rs b/tests/misc.rs new file mode 100644 index 0000000..416e506 --- /dev/null +++ b/tests/misc.rs @@ -0,0 +1,47 @@ +use similar_asserts::assert_eq; +use walkdir::{DirEntry, WalkDir}; + +#[test] +fn links_to_html_spec_use_multipage_version() { + for entry in WalkDir::new(".") + .min_depth(1) + .into_iter() + .filter_entry(is_source_file) + .flatten() + { + if !entry.file_type().is_file() { + continue; + } + + let actual = match std::fs::read_to_string(entry.path()) { + Ok(content) => content, + Err(err) => panic!("invalid UTF-8 in file content: {:?}: {}", entry.path(), err), + }; + + let expected = actual.replace( + concat!("://html.spec.whatwg.org/", "#"), + concat!("://html.spec.whatwg.org/multipage/???.html#"), + ); + + assert_eq!( + actual, + expected, + "Found a link to the one-page version of the HTML spec, which is huge and takes long to load. We want to link the multipage version instead." + ); + } +} + +fn is_source_file(entry: &DirEntry) -> bool { + let Some(filename) = entry.file_name().to_str() else { + panic!("invalid UTF-8 in filename: {:?}", entry.path()) + }; + + if entry.depth() == 1 && filename == "target" || filename == "Cargo.lock" { + return false; // cargo files + } + if filename == "html5lib-tests" { + return false; // git submodule + } + + !filename.starts_with('.') // .git, etc. +} |