diff options
| -rw-r--r-- | Cargo.toml | 12 | ||||
| -rw-r--r-- | README.md | 26 | ||||
| -rw-r--r-- | benches/html5ever.rs | 4 | ||||
| -rw-r--r-- | examples/noop-tokenize.rs | 4 | ||||
| -rw-r--r-- | examples/tokenize.rs | 8 | ||||
| -rw-r--r-- | src/lib.rs | 1 | 
6 files changed, 40 insertions, 15 deletions
| @@ -1,13 +1,13 @@  [package] -name = "html5ever" -version = "0.25.1" -authors = [ "The html5ever Project Developers" ] +name = "html5tokenizer" +version = "0.1.0" +authors = [ "The html5ever Project Developers", "Martin Fischer" ]  license = "MIT / Apache-2.0" -repository = "https://github.com/servo/html5ever" -description = "High-performance browser-grade HTML5 parser" -documentation = "https://docs.rs/html5ever" +repository = "https://git.push-f.com/html5tokenizer" +description = "The HTML5 tokenizer from html5ever repackaged with its dependencies removed"  categories = [ "parser-implementations", "web-programming" ] +keywords = ["html", "html5", "tokenizer", "parser"]  edition = "2018"  [dev-dependencies] diff --git a/README.md b/README.md new file mode 100644 index 0000000..531358e --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# html5tokenizer + +This crate provides the tokenizer form [html5ever](https://crates.io/crates/html5ever), +repackaged with all of its dependencies removed. The following dependencies were removed: + +* [markup5ever](https://crates.io/crates/markup5ever)   +  `buffer_queue` and `smallcharset` were merged into the source code + +* [tendril](https://crates.io/crates/tendril)   +  According to its README it contains "a substantial amount of unsafe code". +  This fork replaces the tendril strings with plain old `std::string::String`s. + +* [mac](https://crates.io/crates/mac)   +  The only macros actually needed (`format_if` and `test_eq`) were merged into +  the source code. + +* [log](https://crates.io/crates/log)   +  Was only used for debug output. + +If you want to parse HTML into a tree (DOM) you should by all means use +html5ever, this crate is merely for those who only want an HTML5 tokenizer and +seek to minimize their compile dependencies (html5ever pulls in 56). + +## Credits + +Thanks to the developers of html5ever for their awesome parser! diff --git a/benches/html5ever.rs b/benches/html5ever.rs index 9f4b815..88a1d4b 100644 --- a/benches/html5ever.rs +++ b/benches/html5ever.rs @@ -1,12 +1,12 @@  #[macro_use]  extern crate criterion; -extern crate html5ever; +extern crate html5tokenizer;  use std::path::PathBuf;  use criterion::{black_box, Criterion}; -use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; +use html5tokenizer::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};  struct Sink; diff --git a/examples/noop-tokenize.rs b/examples/noop-tokenize.rs index 323c429..9557965 100644 --- a/examples/noop-tokenize.rs +++ b/examples/noop-tokenize.rs @@ -9,12 +9,12 @@  // Run a single benchmark once.  For use with profiling tools. -extern crate html5ever; +extern crate html5tokenizer;  use std::default::Default;  use std::io; -use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; +use html5tokenizer::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};  use io::Read;  struct Sink(Vec<Token>); diff --git a/examples/tokenize.rs b/examples/tokenize.rs index 943513a..e9b0013 100644 --- a/examples/tokenize.rs +++ b/examples/tokenize.rs @@ -7,14 +7,14 @@  // option. This file may not be copied, modified, or distributed  // except according to those terms. -extern crate html5ever; +extern crate html5tokenizer;  use std::default::Default;  use std::io; -use html5ever::tokenizer::BufferQueue; -use html5ever::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken}; -use html5ever::tokenizer::{ +use html5tokenizer::tokenizer::BufferQueue; +use html5tokenizer::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken}; +use html5tokenizer::tokenizer::{      ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,  };  use io::Read; @@ -7,7 +7,6 @@  // option. This file may not be copied, modified, or distributed  // except according to those terms. -#![crate_name = "html5ever"]  #![crate_type = "dylib"]  #![cfg_attr(test, deny(warnings))]  #![allow(unused_parens)] | 
