1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
extern crate html5tokenizer;
use std::default::Default;
use std::io;
use html5tokenizer::BufferQueue;
use html5tokenizer::TagKind::{EndTag, StartTag};
use html5tokenizer::Token::{CharacterTokens, NullCharacterToken, ParseError, TagToken};
use html5tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts};
use io::Read;
#[derive(Copy, Clone)]
struct TokenPrinter {
in_char_run: bool,
}
impl TokenPrinter {
fn is_char(&mut self, is_char: bool) {
match (self.in_char_run, is_char) {
(false, true) => print!("CHAR : \""),
(true, false) => println!("\""),
_ => (),
}
self.in_char_run = is_char;
}
fn do_char(&mut self, c: char) {
self.is_char(true);
print!("{}", c.escape_default().collect::<String>());
}
}
impl TokenSink for TokenPrinter {
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult {
match token {
CharacterTokens(b) => {
for c in b.chars() {
self.do_char(c);
}
}
NullCharacterToken => self.do_char('\0'),
TagToken(tag) => {
self.is_char(false);
// This is not proper HTML serialization, of course.
match tag.kind {
StartTag => print!("TAG : <\x1b[32m{}\x1b[0m", tag.name),
EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name),
}
for attr in tag.attrs.iter() {
print!(
" \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'",
attr.name, attr.value
);
}
if tag.self_closing {
print!(" \x1b[31m/\x1b[0m");
}
println!(">");
}
ParseError { error, .. } => {
self.is_char(false);
println!("ERROR: {}", error);
}
_ => {
self.is_char(false);
println!("OTHER: {:?}", token);
}
}
TokenSinkResult::Continue
}
}
fn main() {
let mut sink = TokenPrinter { in_char_run: false };
let mut chunk = Vec::new();
io::stdin().read_to_end(&mut chunk).unwrap();
let mut input = BufferQueue::new();
input.push_back(std::str::from_utf8(&chunk).unwrap().to_string());
let mut tok = Tokenizer::new(
sink,
TokenizerOpts {
profile: true,
..Default::default()
},
);
let _ = tok.feed(&mut input);
assert!(input.is_empty());
tok.end();
sink.is_char(false);
}
|