diff options
-rw-r--r-- | Cargo.toml | 12 | ||||
-rw-r--r-- | README.md | 26 | ||||
-rw-r--r-- | benches/html5ever.rs | 4 | ||||
-rw-r--r-- | examples/noop-tokenize.rs | 4 | ||||
-rw-r--r-- | examples/tokenize.rs | 8 | ||||
-rw-r--r-- | src/lib.rs | 1 |
6 files changed, 40 insertions, 15 deletions
@@ -1,13 +1,13 @@ [package] -name = "html5ever" -version = "0.25.1" -authors = [ "The html5ever Project Developers" ] +name = "html5tokenizer" +version = "0.1.0" +authors = [ "The html5ever Project Developers", "Martin Fischer" ] license = "MIT / Apache-2.0" -repository = "https://github.com/servo/html5ever" -description = "High-performance browser-grade HTML5 parser" -documentation = "https://docs.rs/html5ever" +repository = "https://git.push-f.com/html5tokenizer" +description = "The HTML5 tokenizer from html5ever repackaged with its dependencies removed" categories = [ "parser-implementations", "web-programming" ] +keywords = ["html", "html5", "tokenizer", "parser"] edition = "2018" [dev-dependencies] diff --git a/README.md b/README.md new file mode 100644 index 0000000..531358e --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# html5tokenizer + +This crate provides the tokenizer form [html5ever](https://crates.io/crates/html5ever), +repackaged with all of its dependencies removed. The following dependencies were removed: + +* [markup5ever](https://crates.io/crates/markup5ever) + `buffer_queue` and `smallcharset` were merged into the source code + +* [tendril](https://crates.io/crates/tendril) + According to its README it contains "a substantial amount of unsafe code". + This fork replaces the tendril strings with plain old `std::string::String`s. + +* [mac](https://crates.io/crates/mac) + The only macros actually needed (`format_if` and `test_eq`) were merged into + the source code. + +* [log](https://crates.io/crates/log) + Was only used for debug output. + +If you want to parse HTML into a tree (DOM) you should by all means use +html5ever, this crate is merely for those who only want an HTML5 tokenizer and +seek to minimize their compile dependencies (html5ever pulls in 56). + +## Credits + +Thanks to the developers of html5ever for their awesome parser! diff --git a/benches/html5ever.rs b/benches/html5ever.rs index 9f4b815..88a1d4b 100644 --- a/benches/html5ever.rs +++ b/benches/html5ever.rs @@ -1,12 +1,12 @@ #[macro_use] extern crate criterion; -extern crate html5ever; +extern crate html5tokenizer; use std::path::PathBuf; use criterion::{black_box, Criterion}; -use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; +use html5tokenizer::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; struct Sink; diff --git a/examples/noop-tokenize.rs b/examples/noop-tokenize.rs index 323c429..9557965 100644 --- a/examples/noop-tokenize.rs +++ b/examples/noop-tokenize.rs @@ -9,12 +9,12 @@ // Run a single benchmark once. For use with profiling tools. -extern crate html5ever; +extern crate html5tokenizer; use std::default::Default; use std::io; -use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; +use html5tokenizer::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; use io::Read; struct Sink(Vec<Token>); diff --git a/examples/tokenize.rs b/examples/tokenize.rs index 943513a..e9b0013 100644 --- a/examples/tokenize.rs +++ b/examples/tokenize.rs @@ -7,14 +7,14 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -extern crate html5ever; +extern crate html5tokenizer; use std::default::Default; use std::io; -use html5ever::tokenizer::BufferQueue; -use html5ever::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken}; -use html5ever::tokenizer::{ +use html5tokenizer::tokenizer::BufferQueue; +use html5tokenizer::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken}; +use html5tokenizer::tokenizer::{ ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts, }; use io::Read; @@ -7,7 +7,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![crate_name = "html5ever"] #![crate_type = "dylib"] #![cfg_attr(test, deny(warnings))] #![allow(unused_parens)] |