diff options
Diffstat (limited to 'examples')
| -rw-r--r-- | examples/arena.rs | 335 | ||||
| -rw-r--r-- | examples/capi/tokenize.c | 74 | ||||
| -rw-r--r-- | examples/noop-tokenize.rs | 43 | ||||
| -rw-r--r-- | examples/noop-tree-builder.rs | 112 | ||||
| -rw-r--r-- | examples/print-tree-actions.rs | 177 | ||||
| -rw-r--r-- | examples/tokenize.rs | 103 | 
6 files changed, 844 insertions, 0 deletions
| diff --git a/examples/arena.rs b/examples/arena.rs new file mode 100644 index 0000000..1b59ae1 --- /dev/null +++ b/examples/arena.rs @@ -0,0 +1,335 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate html5ever; +extern crate typed_arena; + +use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; +use html5ever::tendril::{StrTendril, TendrilSink}; +use html5ever::{parse_document, Attribute, ExpandedName, QualName}; +use std::borrow::Cow; +use std::cell::{Cell, RefCell}; +use std::collections::HashSet; +use std::io::{self, Read}; +use std::ptr; + +fn main() { +    let mut bytes = Vec::new(); +    io::stdin().read_to_end(&mut bytes).unwrap(); +    let arena = typed_arena::Arena::new(); +    html5ever_parse_slice_into_arena(&bytes, &arena); +} + +fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> { +    let sink = Sink { +        arena: arena, +        document: arena.alloc(Node::new(NodeData::Document)), +        quirks_mode: QuirksMode::NoQuirks, +    }; +    parse_document(sink, Default::default()) +        .from_utf8() +        .one(bytes) +} + +type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>; + +type Ref<'arena> = &'arena Node<'arena>; + +type Link<'arena> = Cell<Option<Ref<'arena>>>; + +struct Sink<'arena> { +    arena: Arena<'arena>, +    document: Ref<'arena>, +    quirks_mode: QuirksMode, +} + +pub struct Node<'arena> { +    parent: Link<'arena>, +    next_sibling: Link<'arena>, +    previous_sibling: Link<'arena>, +    first_child: Link<'arena>, +    last_child: Link<'arena>, +    data: NodeData<'arena>, +} + +pub enum NodeData<'arena> { +    Document, +    Doctype { +        name: StrTendril, +        public_id: StrTendril, +        system_id: StrTendril, +    }, +    Text { +        contents: RefCell<StrTendril>, +    }, +    Comment { +        contents: StrTendril, +    }, +    Element { +        name: QualName, +        attrs: RefCell<Vec<Attribute>>, +        template_contents: Option<Ref<'arena>>, +        mathml_annotation_xml_integration_point: bool, +    }, +    ProcessingInstruction { +        target: StrTendril, +        contents: StrTendril, +    }, +} + +impl<'arena> Node<'arena> { +    fn new(data: NodeData<'arena>) -> Self { +        Node { +            parent: Cell::new(None), +            previous_sibling: Cell::new(None), +            next_sibling: Cell::new(None), +            first_child: Cell::new(None), +            last_child: Cell::new(None), +            data: data, +        } +    } + +    fn detach(&self) { +        let parent = self.parent.take(); +        let previous_sibling = self.previous_sibling.take(); +        let next_sibling = self.next_sibling.take(); + +        if let Some(next_sibling) = next_sibling { +            next_sibling.previous_sibling.set(previous_sibling); +        } else if let Some(parent) = parent { +            parent.last_child.set(previous_sibling); +        } + +        if let Some(previous_sibling) = previous_sibling { +            previous_sibling.next_sibling.set(next_sibling); +        } else if let Some(parent) = parent { +            parent.first_child.set(next_sibling); +        } +    } + +    fn append(&'arena self, new_child: &'arena Self) { +        new_child.detach(); +        new_child.parent.set(Some(self)); +        if let Some(last_child) = self.last_child.take() { +            new_child.previous_sibling.set(Some(last_child)); +            debug_assert!(last_child.next_sibling.get().is_none()); +            last_child.next_sibling.set(Some(new_child)); +        } else { +            debug_assert!(self.first_child.get().is_none()); +            self.first_child.set(Some(new_child)); +        } +        self.last_child.set(Some(new_child)); +    } + +    fn insert_before(&'arena self, new_sibling: &'arena Self) { +        new_sibling.detach(); +        new_sibling.parent.set(self.parent.get()); +        new_sibling.next_sibling.set(Some(self)); +        if let Some(previous_sibling) = self.previous_sibling.take() { +            new_sibling.previous_sibling.set(Some(previous_sibling)); +            debug_assert!(ptr::eq::<Node>( +                previous_sibling.next_sibling.get().unwrap(), +                self +            )); +            previous_sibling.next_sibling.set(Some(new_sibling)); +        } else if let Some(parent) = self.parent.get() { +            debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self)); +            parent.first_child.set(Some(new_sibling)); +        } +        self.previous_sibling.set(Some(new_sibling)); +    } +} + +impl<'arena> Sink<'arena> { +    fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> { +        self.arena.alloc(Node::new(data)) +    } + +    fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A) +    where +        P: FnOnce() -> Option<Ref<'arena>>, +        A: FnOnce(Ref<'arena>), +    { +        let new_node = match child { +            NodeOrText::AppendText(text) => { +                // Append to an existing Text node if we have one. +                if let Some(&Node { +                    data: NodeData::Text { ref contents }, +                    .. +                }) = previous() +                { +                    contents.borrow_mut().push_tendril(&text); +                    return; +                } +                self.new_node(NodeData::Text { +                    contents: RefCell::new(text), +                }) +            }, +            NodeOrText::AppendNode(node) => node, +        }; + +        append(new_node) +    } +} + +impl<'arena> TreeSink for Sink<'arena> { +    type Handle = Ref<'arena>; +    type Output = Ref<'arena>; + +    fn finish(self) -> Ref<'arena> { +        self.document +    } + +    fn parse_error(&mut self, _: Cow<'static, str>) {} + +    fn get_document(&mut self) -> Ref<'arena> { +        self.document +    } + +    fn set_quirks_mode(&mut self, mode: QuirksMode) { +        self.quirks_mode = mode; +    } + +    fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool { +        ptr::eq::<Node>(*x, *y) +    } + +    fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> { +        match target.data { +            NodeData::Element { ref name, .. } => name.expanded(), +            _ => panic!("not an element!"), +        } +    } + +    fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { +        if let NodeData::Element { +            template_contents: Some(ref contents), +            .. +        } = target.data +        { +            contents +        } else { +            panic!("not a template element!") +        } +    } + +    fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool { +        if let NodeData::Element { +            mathml_annotation_xml_integration_point, +            .. +        } = target.data +        { +            mathml_annotation_xml_integration_point +        } else { +            panic!("not an element!") +        } +    } + +    fn create_element( +        &mut self, +        name: QualName, +        attrs: Vec<Attribute>, +        flags: ElementFlags, +    ) -> Ref<'arena> { +        self.new_node(NodeData::Element { +            name, +            attrs: RefCell::new(attrs), +            template_contents: if flags.template { +                Some(self.new_node(NodeData::Document)) +            } else { +                None +            }, +            mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, +        }) +    } + +    fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> { +        self.new_node(NodeData::Comment { contents: text }) +    } + +    fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { +        self.new_node(NodeData::ProcessingInstruction { +            target: target, +            contents: data, +        }) +    } + +    fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { +        self.append_common( +            child, +            || parent.last_child.get(), +            |new_node| parent.append(new_node), +        ) +    } + +    fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { +        self.append_common( +            child, +            || sibling.previous_sibling.get(), +            |new_node| sibling.insert_before(new_node), +        ) +    } + +    fn append_based_on_parent_node( +        &mut self, +        element: &Ref<'arena>, +        prev_element: &Ref<'arena>, +        child: NodeOrText<Ref<'arena>>, +    ) { +        if element.parent.get().is_some() { +            self.append_before_sibling(element, child) +        } else { +            self.append(prev_element, child) +        } +    } + +    fn append_doctype_to_document( +        &mut self, +        name: StrTendril, +        public_id: StrTendril, +        system_id: StrTendril, +    ) { +        self.document.append(self.new_node(NodeData::Doctype { +            name, +            public_id, +            system_id, +        })) +    } + +    fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) { +        let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { +            attrs.borrow_mut() +        } else { +            panic!("not an element") +        }; + +        let existing_names = existing +            .iter() +            .map(|e| e.name.clone()) +            .collect::<HashSet<_>>(); +        existing.extend( +            attrs +                .into_iter() +                .filter(|attr| !existing_names.contains(&attr.name)), +        ); +    } + +    fn remove_from_parent(&mut self, target: &Ref<'arena>) { +        target.detach() +    } + +    fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { +        let mut next_child = node.first_child.get(); +        while let Some(child) = next_child { +            debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node)); +            next_child = child.next_sibling.get(); +            new_parent.append(child) +        } +    } +} diff --git a/examples/capi/tokenize.c b/examples/capi/tokenize.c new file mode 100644 index 0000000..8c8cdd4 --- /dev/null +++ b/examples/capi/tokenize.c @@ -0,0 +1,74 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#include <stdio.h> + +#include "html5ever.h" + +void put_str(const char *x) { +    fputs(x, stdout); +} + +void put_buf(struct h5e_buf text) { +    fwrite(text.data, text.len, 1, stdout); +} + +void do_chars(void *user, struct h5e_buf text) { +    put_str("CHARS : "); +    put_buf(text); +    put_str("\n"); +} + +void do_start_tag(void *user, struct h5e_buf name, int self_closing, size_t num_attrs) { +    put_str("TAG   : <"); +    put_buf(name); +    if (self_closing) { +        putchar('/'); +    } +    put_str(">\n"); +} + +void do_tag_attr(void *user, struct h5e_buf name, struct h5e_buf value) { +    put_str("  ATTR: "); +    put_buf(name); +    put_str("=\""); +    put_buf(value); +    put_str("\"\n"); +} + +void do_end_tag(void *user, struct h5e_buf name) { +    put_str("TAG   : </"); +    put_buf(name); +    put_str(">\n"); +} + +struct h5e_token_ops ops = { +    .do_chars = do_chars, +    .do_start_tag = do_start_tag, +    .do_tag_attr = do_tag_attr, +    .do_end_tag = do_end_tag, +}; + +struct h5e_token_sink sink = { +    .ops = &ops, +    .user = NULL, +}; + +int main(int argc, char *argv[]) { +    if (argc < 2) { +        printf("Usage: %s 'HTML fragment'\n", argv[0]); +        return 1; +    } + +    struct h5e_tokenizer *tok = h5e_tokenizer_new(&sink); +    h5e_tokenizer_feed(tok, h5e_buf_from_cstr(argv[1])); +    h5e_tokenizer_end(tok); +    h5e_tokenizer_free(tok); +    return 0; +} diff --git a/examples/noop-tokenize.rs b/examples/noop-tokenize.rs new file mode 100644 index 0000000..d6c62f1 --- /dev/null +++ b/examples/noop-tokenize.rs @@ -0,0 +1,43 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Run a single benchmark once.  For use with profiling tools. + +extern crate html5ever; + +use std::default::Default; +use std::io; + +use html5ever::tendril::*; +use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; + +struct Sink(Vec<Token>); + +impl TokenSink for Sink { +    type Handle = (); + +    fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { +        // Don't use the token, but make sure we don't get +        // optimized out entirely. +        self.0.push(token); +        TokenSinkResult::Continue +    } +} + +fn main() { +    let mut chunk = ByteTendril::new(); +    io::stdin().read_to_tendril(&mut chunk).unwrap(); +    let mut input = BufferQueue::new(); +    input.push_back(chunk.try_reinterpret().unwrap()); + +    let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default()); +    let _ = tok.feed(&mut input); +    assert!(input.is_empty()); +    tok.end(); +} diff --git a/examples/noop-tree-builder.rs b/examples/noop-tree-builder.rs new file mode 100644 index 0000000..0775449 --- /dev/null +++ b/examples/noop-tree-builder.rs @@ -0,0 +1,112 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[macro_use] +extern crate html5ever; + +use std::borrow::Cow; +use std::collections::HashMap; +use std::default::Default; +use std::io; + +use html5ever::parse_document; +use html5ever::tendril::*; +use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; +use html5ever::{Attribute, ExpandedName, QualName}; + +struct Sink { +    next_id: usize, +    names: HashMap<usize, QualName>, +} + +impl Sink { +    fn get_id(&mut self) -> usize { +        let id = self.next_id; +        self.next_id += 2; +        id +    } +} + +impl TreeSink for Sink { +    type Handle = usize; +    type Output = Self; +    fn finish(self) -> Self { +        self +    } + +    fn get_document(&mut self) -> usize { +        0 +    } + +    fn get_template_contents(&mut self, target: &usize) -> usize { +        if let Some(expanded_name!(html "template")) = self.names.get(&target).map(|n| n.expanded()) +        { +            target + 1 +        } else { +            panic!("not a template element") +        } +    } + +    fn same_node(&self, x: &usize, y: &usize) -> bool { +        x == y +    } + +    fn elem_name(&self, target: &usize) -> ExpandedName { +        self.names.get(target).expect("not an element").expanded() +    } + +    fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize { +        let id = self.get_id(); +        self.names.insert(id, name); +        id +    } + +    fn create_comment(&mut self, _text: StrTendril) -> usize { +        self.get_id() +    } + +    #[allow(unused_variables)] +    fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize { +        unimplemented!() +    } + +    fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText<usize>) {} + +    fn append_based_on_parent_node( +        &mut self, +        _element: &usize, +        _prev_element: &usize, +        _new_node: NodeOrText<usize>, +    ) { +    } + +    fn parse_error(&mut self, _msg: Cow<'static, str>) {} +    fn set_quirks_mode(&mut self, _mode: QuirksMode) {} +    fn append(&mut self, _parent: &usize, _child: NodeOrText<usize>) {} + +    fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {} +    fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec<Attribute>) { +        assert!(self.names.contains_key(&target), "not an element"); +    } +    fn remove_from_parent(&mut self, _target: &usize) {} +    fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {} +    fn mark_script_already_started(&mut self, _node: &usize) {} +} + +fn main() { +    let sink = Sink { +        next_id: 1, +        names: HashMap::new(), +    }; +    let stdin = io::stdin(); +    parse_document(sink, Default::default()) +        .from_utf8() +        .read_from(&mut stdin.lock()) +        .unwrap(); +} diff --git a/examples/print-tree-actions.rs b/examples/print-tree-actions.rs new file mode 100644 index 0000000..dbb6c6e --- /dev/null +++ b/examples/print-tree-actions.rs @@ -0,0 +1,177 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[macro_use] +extern crate html5ever; + +use std::borrow::Cow; +use std::collections::HashMap; +use std::default::Default; +use std::io; + +use html5ever::parse_document; +use html5ever::tendril::*; +use html5ever::tree_builder::{ +    AppendNode, AppendText, ElementFlags, NodeOrText, QuirksMode, TreeSink, +}; +use html5ever::{Attribute, ExpandedName, QualName}; + +struct Sink { +    next_id: usize, +    names: HashMap<usize, QualName>, +} + +impl Sink { +    fn get_id(&mut self) -> usize { +        let id = self.next_id; +        self.next_id += 2; +        id +    } +} + +impl TreeSink for Sink { +    type Handle = usize; +    type Output = Self; +    fn finish(self) -> Self { +        self +    } + +    fn parse_error(&mut self, msg: Cow<'static, str>) { +        println!("Parse error: {}", msg); +    } + +    fn get_document(&mut self) -> usize { +        0 +    } + +    fn get_template_contents(&mut self, target: &usize) -> usize { +        if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded()) +        { +            target + 1 +        } else { +            panic!("not a template element") +        } +    } + +    fn set_quirks_mode(&mut self, mode: QuirksMode) { +        println!("Set quirks mode to {:?}", mode); +    } + +    fn same_node(&self, x: &usize, y: &usize) -> bool { +        x == y +    } + +    fn elem_name(&self, target: &usize) -> ExpandedName { +        self.names.get(target).expect("not an element").expanded() +    } + +    fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize { +        let id = self.get_id(); +        println!("Created {:?} as {}", name, id); +        self.names.insert(id, name); +        id +    } + +    fn create_comment(&mut self, text: StrTendril) -> usize { +        let id = self.get_id(); +        println!("Created comment \"{}\" as {}", escape_default(&text), id); +        id +    } + +    #[allow(unused_variables)] +    fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize { +        unimplemented!() +    } + +    fn append(&mut self, parent: &usize, child: NodeOrText<usize>) { +        match child { +            AppendNode(n) => println!("Append node {} to {}", n, parent), +            AppendText(t) => println!("Append text to {}: \"{}\"", parent, escape_default(&t)), +        } +    } + +    fn append_before_sibling(&mut self, sibling: &usize, new_node: NodeOrText<usize>) { +        match new_node { +            AppendNode(n) => println!("Append node {} before {}", n, sibling), +            AppendText(t) => println!("Append text before {}: \"{}\"", sibling, escape_default(&t)), +        } +    } + +    fn append_based_on_parent_node( +        &mut self, +        element: &Self::Handle, +        _prev_element: &Self::Handle, +        child: NodeOrText<Self::Handle>, +    ) { +        self.append_before_sibling(element, child); +    } + +    fn append_doctype_to_document( +        &mut self, +        name: StrTendril, +        public_id: StrTendril, +        system_id: StrTendril, +    ) { +        println!("Append doctype: {} {} {}", name, public_id, system_id); +    } + +    fn add_attrs_if_missing(&mut self, target: &usize, attrs: Vec<Attribute>) { +        assert!(self.names.contains_key(target), "not an element"); +        println!("Add missing attributes to {}:", target); +        for attr in attrs.into_iter() { +            println!("    {:?} = {}", attr.name, attr.value); +        } +    } + +    fn associate_with_form( +        &mut self, +        _target: &usize, +        _form: &usize, +        _nodes: (&usize, Option<&usize>), +    ) { +        // No form owner support. +    } + +    fn remove_from_parent(&mut self, target: &usize) { +        println!("Remove {} from parent", target); +    } + +    fn reparent_children(&mut self, node: &usize, new_parent: &usize) { +        println!("Move children from {} to {}", node, new_parent); +    } + +    fn mark_script_already_started(&mut self, node: &usize) { +        println!("Mark script {} as already started", node); +    } + +    fn set_current_line(&mut self, line_number: u64) { +        println!("Set current line to {}", line_number); +    } + +    fn pop(&mut self, elem: &usize) { +        println!("Popped element {}", elem); +    } +} + +// FIXME: Copy of str::escape_default from std, which is currently unstable +pub fn escape_default(s: &str) -> String { +    s.chars().flat_map(|c| c.escape_default()).collect() +} + +fn main() { +    let sink = Sink { +        next_id: 1, +        names: HashMap::new(), +    }; +    let stdin = io::stdin(); +    parse_document(sink, Default::default()) +        .from_utf8() +        .read_from(&mut stdin.lock()) +        .unwrap(); +} diff --git a/examples/tokenize.rs b/examples/tokenize.rs new file mode 100644 index 0000000..039ffb7 --- /dev/null +++ b/examples/tokenize.rs @@ -0,0 +1,103 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate html5ever; + +use std::default::Default; +use std::io; + +use html5ever::tendril::*; +use html5ever::tokenizer::BufferQueue; +use html5ever::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken}; +use html5ever::tokenizer::{ +    ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts, +}; + +#[derive(Copy, Clone)] +struct TokenPrinter { +    in_char_run: bool, +} + +impl TokenPrinter { +    fn is_char(&mut self, is_char: bool) { +        match (self.in_char_run, is_char) { +            (false, true) => print!("CHAR : \""), +            (true, false) => println!("\""), +            _ => (), +        } +        self.in_char_run = is_char; +    } + +    fn do_char(&mut self, c: char) { +        self.is_char(true); +        print!("{}", c.escape_default().collect::<String>()); +    } +} + +impl TokenSink for TokenPrinter { +    type Handle = (); + +    fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { +        match token { +            CharacterTokens(b) => { +                for c in b.chars() { +                    self.do_char(c); +                } +            }, +            NullCharacterToken => self.do_char('\0'), +            TagToken(tag) => { +                self.is_char(false); +                // This is not proper HTML serialization, of course. +                match tag.kind { +                    StartTag => print!("TAG  : <\x1b[32m{}\x1b[0m", tag.name), +                    EndTag => print!("TAG  : <\x1b[31m/{}\x1b[0m", tag.name), +                } +                for attr in tag.attrs.iter() { +                    print!( +                        " \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'", +                        attr.name.local, attr.value +                    ); +                } +                if tag.self_closing { +                    print!(" \x1b[31m/\x1b[0m"); +                } +                println!(">"); +            }, +            ParseError(err) => { +                self.is_char(false); +                println!("ERROR: {}", err); +            }, +            _ => { +                self.is_char(false); +                println!("OTHER: {:?}", token); +            }, +        } +        TokenSinkResult::Continue +    } +} + +fn main() { +    let mut sink = TokenPrinter { in_char_run: false }; +    let mut chunk = ByteTendril::new(); +    io::stdin().read_to_tendril(&mut chunk).unwrap(); +    let mut input = BufferQueue::new(); +    input.push_back(chunk.try_reinterpret().unwrap()); + +    let mut tok = Tokenizer::new( +        sink, +        TokenizerOpts { +            profile: true, +            ..Default::default() +        }, +    ); +    let _ = tok.feed(&mut input); +    assert!(input.is_empty()); +    tok.end(); +    sink.is_char(false); +} | 
