1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
#[macro_use]
extern crate criterion;
extern crate html5tokenizer;
use std::path::PathBuf;
use criterion::{black_box, Criterion};
use html5tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};
struct Sink;
impl TokenSink for Sink {
type Handle = ();
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
// Don't use the token, but make sure we don't get
// optimized out entirely.
black_box(token);
TokenSinkResult::Continue
}
}
fn run_bench(c: &mut Criterion, name: &str) {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("data/bench/");
path.push(name);
let file_input: String = std::fs::read_to_string(&path).expect("can't open file");
let size = file_input.len();
let mut stream = file_input.chars().cycle();
// Break the input into chunks of 1024 chars (= a few kB).
// This simulates reading from the network.
let mut input = vec![];
let mut total = 0usize;
while total < size {
// The by_ref() call is important, otherwise we get wrong results!
// See rust-lang/rust#18045.
let sz = std::cmp::min(1024, size - total);
input.push(stream.by_ref().take(sz).collect::<String>());
total += sz;
}
let test_name = format!("html tokenizing {}", name);
c.bench_function(&test_name, move |b| {
b.iter(|| {
let mut tok = Tokenizer::new(Sink, Default::default());
let mut buffer = BufferQueue::new();
// We are doing clone inside the bench function, this is not ideal, but possibly
// necessary since our iterator consumes the underlying buffer.
for buf in input.clone().into_iter() {
buffer.push_back(buf);
let _ = tok.feed(&mut buffer);
}
let _ = tok.feed(&mut buffer);
tok.end();
})
});
}
fn html5ever_benchmark(c: &mut Criterion) {
run_bench(c, "lipsum.html");
run_bench(c, "lipsum-zh.html");
run_bench(c, "medium-fragment.html");
run_bench(c, "small-fragment.html");
run_bench(c, "tiny-fragment.html");
run_bench(c, "strong.html");
}
criterion_group!(benches, html5ever_benchmark);
criterion_main!(benches);
|