summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rw-r--r--examples/arena.rs335
-rw-r--r--examples/capi/tokenize.c74
-rw-r--r--examples/noop-tokenize.rs43
-rw-r--r--examples/noop-tree-builder.rs112
-rw-r--r--examples/print-tree-actions.rs177
-rw-r--r--examples/tokenize.rs103
6 files changed, 844 insertions, 0 deletions
diff --git a/examples/arena.rs b/examples/arena.rs
new file mode 100644
index 0000000..1b59ae1
--- /dev/null
+++ b/examples/arena.rs
@@ -0,0 +1,335 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+extern crate html5ever;
+extern crate typed_arena;
+
+use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
+use html5ever::tendril::{StrTendril, TendrilSink};
+use html5ever::{parse_document, Attribute, ExpandedName, QualName};
+use std::borrow::Cow;
+use std::cell::{Cell, RefCell};
+use std::collections::HashSet;
+use std::io::{self, Read};
+use std::ptr;
+
+fn main() {
+ let mut bytes = Vec::new();
+ io::stdin().read_to_end(&mut bytes).unwrap();
+ let arena = typed_arena::Arena::new();
+ html5ever_parse_slice_into_arena(&bytes, &arena);
+}
+
+fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
+ let sink = Sink {
+ arena: arena,
+ document: arena.alloc(Node::new(NodeData::Document)),
+ quirks_mode: QuirksMode::NoQuirks,
+ };
+ parse_document(sink, Default::default())
+ .from_utf8()
+ .one(bytes)
+}
+
+type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;
+
+type Ref<'arena> = &'arena Node<'arena>;
+
+type Link<'arena> = Cell<Option<Ref<'arena>>>;
+
+struct Sink<'arena> {
+ arena: Arena<'arena>,
+ document: Ref<'arena>,
+ quirks_mode: QuirksMode,
+}
+
+pub struct Node<'arena> {
+ parent: Link<'arena>,
+ next_sibling: Link<'arena>,
+ previous_sibling: Link<'arena>,
+ first_child: Link<'arena>,
+ last_child: Link<'arena>,
+ data: NodeData<'arena>,
+}
+
+pub enum NodeData<'arena> {
+ Document,
+ Doctype {
+ name: StrTendril,
+ public_id: StrTendril,
+ system_id: StrTendril,
+ },
+ Text {
+ contents: RefCell<StrTendril>,
+ },
+ Comment {
+ contents: StrTendril,
+ },
+ Element {
+ name: QualName,
+ attrs: RefCell<Vec<Attribute>>,
+ template_contents: Option<Ref<'arena>>,
+ mathml_annotation_xml_integration_point: bool,
+ },
+ ProcessingInstruction {
+ target: StrTendril,
+ contents: StrTendril,
+ },
+}
+
+impl<'arena> Node<'arena> {
+ fn new(data: NodeData<'arena>) -> Self {
+ Node {
+ parent: Cell::new(None),
+ previous_sibling: Cell::new(None),
+ next_sibling: Cell::new(None),
+ first_child: Cell::new(None),
+ last_child: Cell::new(None),
+ data: data,
+ }
+ }
+
+ fn detach(&self) {
+ let parent = self.parent.take();
+ let previous_sibling = self.previous_sibling.take();
+ let next_sibling = self.next_sibling.take();
+
+ if let Some(next_sibling) = next_sibling {
+ next_sibling.previous_sibling.set(previous_sibling);
+ } else if let Some(parent) = parent {
+ parent.last_child.set(previous_sibling);
+ }
+
+ if let Some(previous_sibling) = previous_sibling {
+ previous_sibling.next_sibling.set(next_sibling);
+ } else if let Some(parent) = parent {
+ parent.first_child.set(next_sibling);
+ }
+ }
+
+ fn append(&'arena self, new_child: &'arena Self) {
+ new_child.detach();
+ new_child.parent.set(Some(self));
+ if let Some(last_child) = self.last_child.take() {
+ new_child.previous_sibling.set(Some(last_child));
+ debug_assert!(last_child.next_sibling.get().is_none());
+ last_child.next_sibling.set(Some(new_child));
+ } else {
+ debug_assert!(self.first_child.get().is_none());
+ self.first_child.set(Some(new_child));
+ }
+ self.last_child.set(Some(new_child));
+ }
+
+ fn insert_before(&'arena self, new_sibling: &'arena Self) {
+ new_sibling.detach();
+ new_sibling.parent.set(self.parent.get());
+ new_sibling.next_sibling.set(Some(self));
+ if let Some(previous_sibling) = self.previous_sibling.take() {
+ new_sibling.previous_sibling.set(Some(previous_sibling));
+ debug_assert!(ptr::eq::<Node>(
+ previous_sibling.next_sibling.get().unwrap(),
+ self
+ ));
+ previous_sibling.next_sibling.set(Some(new_sibling));
+ } else if let Some(parent) = self.parent.get() {
+ debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self));
+ parent.first_child.set(Some(new_sibling));
+ }
+ self.previous_sibling.set(Some(new_sibling));
+ }
+}
+
+impl<'arena> Sink<'arena> {
+ fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> {
+ self.arena.alloc(Node::new(data))
+ }
+
+ fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A)
+ where
+ P: FnOnce() -> Option<Ref<'arena>>,
+ A: FnOnce(Ref<'arena>),
+ {
+ let new_node = match child {
+ NodeOrText::AppendText(text) => {
+ // Append to an existing Text node if we have one.
+ if let Some(&Node {
+ data: NodeData::Text { ref contents },
+ ..
+ }) = previous()
+ {
+ contents.borrow_mut().push_tendril(&text);
+ return;
+ }
+ self.new_node(NodeData::Text {
+ contents: RefCell::new(text),
+ })
+ },
+ NodeOrText::AppendNode(node) => node,
+ };
+
+ append(new_node)
+ }
+}
+
+impl<'arena> TreeSink for Sink<'arena> {
+ type Handle = Ref<'arena>;
+ type Output = Ref<'arena>;
+
+ fn finish(self) -> Ref<'arena> {
+ self.document
+ }
+
+ fn parse_error(&mut self, _: Cow<'static, str>) {}
+
+ fn get_document(&mut self) -> Ref<'arena> {
+ self.document
+ }
+
+ fn set_quirks_mode(&mut self, mode: QuirksMode) {
+ self.quirks_mode = mode;
+ }
+
+ fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
+ ptr::eq::<Node>(*x, *y)
+ }
+
+ fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> {
+ match target.data {
+ NodeData::Element { ref name, .. } => name.expanded(),
+ _ => panic!("not an element!"),
+ }
+ }
+
+ fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
+ if let NodeData::Element {
+ template_contents: Some(ref contents),
+ ..
+ } = target.data
+ {
+ contents
+ } else {
+ panic!("not a template element!")
+ }
+ }
+
+ fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool {
+ if let NodeData::Element {
+ mathml_annotation_xml_integration_point,
+ ..
+ } = target.data
+ {
+ mathml_annotation_xml_integration_point
+ } else {
+ panic!("not an element!")
+ }
+ }
+
+ fn create_element(
+ &mut self,
+ name: QualName,
+ attrs: Vec<Attribute>,
+ flags: ElementFlags,
+ ) -> Ref<'arena> {
+ self.new_node(NodeData::Element {
+ name,
+ attrs: RefCell::new(attrs),
+ template_contents: if flags.template {
+ Some(self.new_node(NodeData::Document))
+ } else {
+ None
+ },
+ mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
+ })
+ }
+
+ fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
+ self.new_node(NodeData::Comment { contents: text })
+ }
+
+ fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
+ self.new_node(NodeData::ProcessingInstruction {
+ target: target,
+ contents: data,
+ })
+ }
+
+ fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
+ self.append_common(
+ child,
+ || parent.last_child.get(),
+ |new_node| parent.append(new_node),
+ )
+ }
+
+ fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
+ self.append_common(
+ child,
+ || sibling.previous_sibling.get(),
+ |new_node| sibling.insert_before(new_node),
+ )
+ }
+
+ fn append_based_on_parent_node(
+ &mut self,
+ element: &Ref<'arena>,
+ prev_element: &Ref<'arena>,
+ child: NodeOrText<Ref<'arena>>,
+ ) {
+ if element.parent.get().is_some() {
+ self.append_before_sibling(element, child)
+ } else {
+ self.append(prev_element, child)
+ }
+ }
+
+ fn append_doctype_to_document(
+ &mut self,
+ name: StrTendril,
+ public_id: StrTendril,
+ system_id: StrTendril,
+ ) {
+ self.document.append(self.new_node(NodeData::Doctype {
+ name,
+ public_id,
+ system_id,
+ }))
+ }
+
+ fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
+ let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
+ attrs.borrow_mut()
+ } else {
+ panic!("not an element")
+ };
+
+ let existing_names = existing
+ .iter()
+ .map(|e| e.name.clone())
+ .collect::<HashSet<_>>();
+ existing.extend(
+ attrs
+ .into_iter()
+ .filter(|attr| !existing_names.contains(&attr.name)),
+ );
+ }
+
+ fn remove_from_parent(&mut self, target: &Ref<'arena>) {
+ target.detach()
+ }
+
+ fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
+ let mut next_child = node.first_child.get();
+ while let Some(child) = next_child {
+ debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
+ next_child = child.next_sibling.get();
+ new_parent.append(child)
+ }
+ }
+}
diff --git a/examples/capi/tokenize.c b/examples/capi/tokenize.c
new file mode 100644
index 0000000..8c8cdd4
--- /dev/null
+++ b/examples/capi/tokenize.c
@@ -0,0 +1,74 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#include <stdio.h>
+
+#include "html5ever.h"
+
+void put_str(const char *x) {
+ fputs(x, stdout);
+}
+
+void put_buf(struct h5e_buf text) {
+ fwrite(text.data, text.len, 1, stdout);
+}
+
+void do_chars(void *user, struct h5e_buf text) {
+ put_str("CHARS : ");
+ put_buf(text);
+ put_str("\n");
+}
+
+void do_start_tag(void *user, struct h5e_buf name, int self_closing, size_t num_attrs) {
+ put_str("TAG : <");
+ put_buf(name);
+ if (self_closing) {
+ putchar('/');
+ }
+ put_str(">\n");
+}
+
+void do_tag_attr(void *user, struct h5e_buf name, struct h5e_buf value) {
+ put_str(" ATTR: ");
+ put_buf(name);
+ put_str("=\"");
+ put_buf(value);
+ put_str("\"\n");
+}
+
+void do_end_tag(void *user, struct h5e_buf name) {
+ put_str("TAG : </");
+ put_buf(name);
+ put_str(">\n");
+}
+
+struct h5e_token_ops ops = {
+ .do_chars = do_chars,
+ .do_start_tag = do_start_tag,
+ .do_tag_attr = do_tag_attr,
+ .do_end_tag = do_end_tag,
+};
+
+struct h5e_token_sink sink = {
+ .ops = &ops,
+ .user = NULL,
+};
+
+int main(int argc, char *argv[]) {
+ if (argc < 2) {
+ printf("Usage: %s 'HTML fragment'\n", argv[0]);
+ return 1;
+ }
+
+ struct h5e_tokenizer *tok = h5e_tokenizer_new(&sink);
+ h5e_tokenizer_feed(tok, h5e_buf_from_cstr(argv[1]));
+ h5e_tokenizer_end(tok);
+ h5e_tokenizer_free(tok);
+ return 0;
+}
diff --git a/examples/noop-tokenize.rs b/examples/noop-tokenize.rs
new file mode 100644
index 0000000..d6c62f1
--- /dev/null
+++ b/examples/noop-tokenize.rs
@@ -0,0 +1,43 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// Run a single benchmark once. For use with profiling tools.
+
+extern crate html5ever;
+
+use std::default::Default;
+use std::io;
+
+use html5ever::tendril::*;
+use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};
+
+struct Sink(Vec<Token>);
+
+impl TokenSink for Sink {
+ type Handle = ();
+
+ fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
+ // Don't use the token, but make sure we don't get
+ // optimized out entirely.
+ self.0.push(token);
+ TokenSinkResult::Continue
+ }
+}
+
+fn main() {
+ let mut chunk = ByteTendril::new();
+ io::stdin().read_to_tendril(&mut chunk).unwrap();
+ let mut input = BufferQueue::new();
+ input.push_back(chunk.try_reinterpret().unwrap());
+
+ let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default());
+ let _ = tok.feed(&mut input);
+ assert!(input.is_empty());
+ tok.end();
+}
diff --git a/examples/noop-tree-builder.rs b/examples/noop-tree-builder.rs
new file mode 100644
index 0000000..0775449
--- /dev/null
+++ b/examples/noop-tree-builder.rs
@@ -0,0 +1,112 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#[macro_use]
+extern crate html5ever;
+
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::default::Default;
+use std::io;
+
+use html5ever::parse_document;
+use html5ever::tendril::*;
+use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
+use html5ever::{Attribute, ExpandedName, QualName};
+
+struct Sink {
+ next_id: usize,
+ names: HashMap<usize, QualName>,
+}
+
+impl Sink {
+ fn get_id(&mut self) -> usize {
+ let id = self.next_id;
+ self.next_id += 2;
+ id
+ }
+}
+
+impl TreeSink for Sink {
+ type Handle = usize;
+ type Output = Self;
+ fn finish(self) -> Self {
+ self
+ }
+
+ fn get_document(&mut self) -> usize {
+ 0
+ }
+
+ fn get_template_contents(&mut self, target: &usize) -> usize {
+ if let Some(expanded_name!(html "template")) = self.names.get(&target).map(|n| n.expanded())
+ {
+ target + 1
+ } else {
+ panic!("not a template element")
+ }
+ }
+
+ fn same_node(&self, x: &usize, y: &usize) -> bool {
+ x == y
+ }
+
+ fn elem_name(&self, target: &usize) -> ExpandedName {
+ self.names.get(target).expect("not an element").expanded()
+ }
+
+ fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
+ let id = self.get_id();
+ self.names.insert(id, name);
+ id
+ }
+
+ fn create_comment(&mut self, _text: StrTendril) -> usize {
+ self.get_id()
+ }
+
+ #[allow(unused_variables)]
+ fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize {
+ unimplemented!()
+ }
+
+ fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText<usize>) {}
+
+ fn append_based_on_parent_node(
+ &mut self,
+ _element: &usize,
+ _prev_element: &usize,
+ _new_node: NodeOrText<usize>,
+ ) {
+ }
+
+ fn parse_error(&mut self, _msg: Cow<'static, str>) {}
+ fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
+ fn append(&mut self, _parent: &usize, _child: NodeOrText<usize>) {}
+
+ fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {}
+ fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec<Attribute>) {
+ assert!(self.names.contains_key(&target), "not an element");
+ }
+ fn remove_from_parent(&mut self, _target: &usize) {}
+ fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {}
+ fn mark_script_already_started(&mut self, _node: &usize) {}
+}
+
+fn main() {
+ let sink = Sink {
+ next_id: 1,
+ names: HashMap::new(),
+ };
+ let stdin = io::stdin();
+ parse_document(sink, Default::default())
+ .from_utf8()
+ .read_from(&mut stdin.lock())
+ .unwrap();
+}
diff --git a/examples/print-tree-actions.rs b/examples/print-tree-actions.rs
new file mode 100644
index 0000000..dbb6c6e
--- /dev/null
+++ b/examples/print-tree-actions.rs
@@ -0,0 +1,177 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#[macro_use]
+extern crate html5ever;
+
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::default::Default;
+use std::io;
+
+use html5ever::parse_document;
+use html5ever::tendril::*;
+use html5ever::tree_builder::{
+ AppendNode, AppendText, ElementFlags, NodeOrText, QuirksMode, TreeSink,
+};
+use html5ever::{Attribute, ExpandedName, QualName};
+
+struct Sink {
+ next_id: usize,
+ names: HashMap<usize, QualName>,
+}
+
+impl Sink {
+ fn get_id(&mut self) -> usize {
+ let id = self.next_id;
+ self.next_id += 2;
+ id
+ }
+}
+
+impl TreeSink for Sink {
+ type Handle = usize;
+ type Output = Self;
+ fn finish(self) -> Self {
+ self
+ }
+
+ fn parse_error(&mut self, msg: Cow<'static, str>) {
+ println!("Parse error: {}", msg);
+ }
+
+ fn get_document(&mut self) -> usize {
+ 0
+ }
+
+ fn get_template_contents(&mut self, target: &usize) -> usize {
+ if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded())
+ {
+ target + 1
+ } else {
+ panic!("not a template element")
+ }
+ }
+
+ fn set_quirks_mode(&mut self, mode: QuirksMode) {
+ println!("Set quirks mode to {:?}", mode);
+ }
+
+ fn same_node(&self, x: &usize, y: &usize) -> bool {
+ x == y
+ }
+
+ fn elem_name(&self, target: &usize) -> ExpandedName {
+ self.names.get(target).expect("not an element").expanded()
+ }
+
+ fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
+ let id = self.get_id();
+ println!("Created {:?} as {}", name, id);
+ self.names.insert(id, name);
+ id
+ }
+
+ fn create_comment(&mut self, text: StrTendril) -> usize {
+ let id = self.get_id();
+ println!("Created comment \"{}\" as {}", escape_default(&text), id);
+ id
+ }
+
+ #[allow(unused_variables)]
+ fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize {
+ unimplemented!()
+ }
+
+ fn append(&mut self, parent: &usize, child: NodeOrText<usize>) {
+ match child {
+ AppendNode(n) => println!("Append node {} to {}", n, parent),
+ AppendText(t) => println!("Append text to {}: \"{}\"", parent, escape_default(&t)),
+ }
+ }
+
+ fn append_before_sibling(&mut self, sibling: &usize, new_node: NodeOrText<usize>) {
+ match new_node {
+ AppendNode(n) => println!("Append node {} before {}", n, sibling),
+ AppendText(t) => println!("Append text before {}: \"{}\"", sibling, escape_default(&t)),
+ }
+ }
+
+ fn append_based_on_parent_node(
+ &mut self,
+ element: &Self::Handle,
+ _prev_element: &Self::Handle,
+ child: NodeOrText<Self::Handle>,
+ ) {
+ self.append_before_sibling(element, child);
+ }
+
+ fn append_doctype_to_document(
+ &mut self,
+ name: StrTendril,
+ public_id: StrTendril,
+ system_id: StrTendril,
+ ) {
+ println!("Append doctype: {} {} {}", name, public_id, system_id);
+ }
+
+ fn add_attrs_if_missing(&mut self, target: &usize, attrs: Vec<Attribute>) {
+ assert!(self.names.contains_key(target), "not an element");
+ println!("Add missing attributes to {}:", target);
+ for attr in attrs.into_iter() {
+ println!(" {:?} = {}", attr.name, attr.value);
+ }
+ }
+
+ fn associate_with_form(
+ &mut self,
+ _target: &usize,
+ _form: &usize,
+ _nodes: (&usize, Option<&usize>),
+ ) {
+ // No form owner support.
+ }
+
+ fn remove_from_parent(&mut self, target: &usize) {
+ println!("Remove {} from parent", target);
+ }
+
+ fn reparent_children(&mut self, node: &usize, new_parent: &usize) {
+ println!("Move children from {} to {}", node, new_parent);
+ }
+
+ fn mark_script_already_started(&mut self, node: &usize) {
+ println!("Mark script {} as already started", node);
+ }
+
+ fn set_current_line(&mut self, line_number: u64) {
+ println!("Set current line to {}", line_number);
+ }
+
+ fn pop(&mut self, elem: &usize) {
+ println!("Popped element {}", elem);
+ }
+}
+
+// FIXME: Copy of str::escape_default from std, which is currently unstable
+pub fn escape_default(s: &str) -> String {
+ s.chars().flat_map(|c| c.escape_default()).collect()
+}
+
+fn main() {
+ let sink = Sink {
+ next_id: 1,
+ names: HashMap::new(),
+ };
+ let stdin = io::stdin();
+ parse_document(sink, Default::default())
+ .from_utf8()
+ .read_from(&mut stdin.lock())
+ .unwrap();
+}
diff --git a/examples/tokenize.rs b/examples/tokenize.rs
new file mode 100644
index 0000000..039ffb7
--- /dev/null
+++ b/examples/tokenize.rs
@@ -0,0 +1,103 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+extern crate html5ever;
+
+use std::default::Default;
+use std::io;
+
+use html5ever::tendril::*;
+use html5ever::tokenizer::BufferQueue;
+use html5ever::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken};
+use html5ever::tokenizer::{
+ ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,
+};
+
+#[derive(Copy, Clone)]
+struct TokenPrinter {
+ in_char_run: bool,
+}
+
+impl TokenPrinter {
+ fn is_char(&mut self, is_char: bool) {
+ match (self.in_char_run, is_char) {
+ (false, true) => print!("CHAR : \""),
+ (true, false) => println!("\""),
+ _ => (),
+ }
+ self.in_char_run = is_char;
+ }
+
+ fn do_char(&mut self, c: char) {
+ self.is_char(true);
+ print!("{}", c.escape_default().collect::<String>());
+ }
+}
+
+impl TokenSink for TokenPrinter {
+ type Handle = ();
+
+ fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
+ match token {
+ CharacterTokens(b) => {
+ for c in b.chars() {
+ self.do_char(c);
+ }
+ },
+ NullCharacterToken => self.do_char('\0'),
+ TagToken(tag) => {
+ self.is_char(false);
+ // This is not proper HTML serialization, of course.
+ match tag.kind {
+ StartTag => print!("TAG : <\x1b[32m{}\x1b[0m", tag.name),
+ EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name),
+ }
+ for attr in tag.attrs.iter() {
+ print!(
+ " \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'",
+ attr.name.local, attr.value
+ );
+ }
+ if tag.self_closing {
+ print!(" \x1b[31m/\x1b[0m");
+ }
+ println!(">");
+ },
+ ParseError(err) => {
+ self.is_char(false);
+ println!("ERROR: {}", err);
+ },
+ _ => {
+ self.is_char(false);
+ println!("OTHER: {:?}", token);
+ },
+ }
+ TokenSinkResult::Continue
+ }
+}
+
+fn main() {
+ let mut sink = TokenPrinter { in_char_run: false };
+ let mut chunk = ByteTendril::new();
+ io::stdin().read_to_tendril(&mut chunk).unwrap();
+ let mut input = BufferQueue::new();
+ input.push_back(chunk.try_reinterpret().unwrap());
+
+ let mut tok = Tokenizer::new(
+ sink,
+ TokenizerOpts {
+ profile: true,
+ ..Default::default()
+ },
+ );
+ let _ = tok.feed(&mut input);
+ assert!(input.is_empty());
+ tok.end();
+ sink.is_char(false);
+}