aboutsummaryrefslogtreecommitdiff
path: root/examples/arena.rs
diff options
context:
space:
mode:
Diffstat (limited to 'examples/arena.rs')
-rw-r--r--examples/arena.rs335
1 files changed, 335 insertions, 0 deletions
diff --git a/examples/arena.rs b/examples/arena.rs
new file mode 100644
index 0000000..1b59ae1
--- /dev/null
+++ b/examples/arena.rs
@@ -0,0 +1,335 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+extern crate html5ever;
+extern crate typed_arena;
+
+use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
+use html5ever::tendril::{StrTendril, TendrilSink};
+use html5ever::{parse_document, Attribute, ExpandedName, QualName};
+use std::borrow::Cow;
+use std::cell::{Cell, RefCell};
+use std::collections::HashSet;
+use std::io::{self, Read};
+use std::ptr;
+
+fn main() {
+ let mut bytes = Vec::new();
+ io::stdin().read_to_end(&mut bytes).unwrap();
+ let arena = typed_arena::Arena::new();
+ html5ever_parse_slice_into_arena(&bytes, &arena);
+}
+
+fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
+ let sink = Sink {
+ arena: arena,
+ document: arena.alloc(Node::new(NodeData::Document)),
+ quirks_mode: QuirksMode::NoQuirks,
+ };
+ parse_document(sink, Default::default())
+ .from_utf8()
+ .one(bytes)
+}
+
+type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;
+
+type Ref<'arena> = &'arena Node<'arena>;
+
+type Link<'arena> = Cell<Option<Ref<'arena>>>;
+
+struct Sink<'arena> {
+ arena: Arena<'arena>,
+ document: Ref<'arena>,
+ quirks_mode: QuirksMode,
+}
+
+pub struct Node<'arena> {
+ parent: Link<'arena>,
+ next_sibling: Link<'arena>,
+ previous_sibling: Link<'arena>,
+ first_child: Link<'arena>,
+ last_child: Link<'arena>,
+ data: NodeData<'arena>,
+}
+
+pub enum NodeData<'arena> {
+ Document,
+ Doctype {
+ name: StrTendril,
+ public_id: StrTendril,
+ system_id: StrTendril,
+ },
+ Text {
+ contents: RefCell<StrTendril>,
+ },
+ Comment {
+ contents: StrTendril,
+ },
+ Element {
+ name: QualName,
+ attrs: RefCell<Vec<Attribute>>,
+ template_contents: Option<Ref<'arena>>,
+ mathml_annotation_xml_integration_point: bool,
+ },
+ ProcessingInstruction {
+ target: StrTendril,
+ contents: StrTendril,
+ },
+}
+
+impl<'arena> Node<'arena> {
+ fn new(data: NodeData<'arena>) -> Self {
+ Node {
+ parent: Cell::new(None),
+ previous_sibling: Cell::new(None),
+ next_sibling: Cell::new(None),
+ first_child: Cell::new(None),
+ last_child: Cell::new(None),
+ data: data,
+ }
+ }
+
+ fn detach(&self) {
+ let parent = self.parent.take();
+ let previous_sibling = self.previous_sibling.take();
+ let next_sibling = self.next_sibling.take();
+
+ if let Some(next_sibling) = next_sibling {
+ next_sibling.previous_sibling.set(previous_sibling);
+ } else if let Some(parent) = parent {
+ parent.last_child.set(previous_sibling);
+ }
+
+ if let Some(previous_sibling) = previous_sibling {
+ previous_sibling.next_sibling.set(next_sibling);
+ } else if let Some(parent) = parent {
+ parent.first_child.set(next_sibling);
+ }
+ }
+
+ fn append(&'arena self, new_child: &'arena Self) {
+ new_child.detach();
+ new_child.parent.set(Some(self));
+ if let Some(last_child) = self.last_child.take() {
+ new_child.previous_sibling.set(Some(last_child));
+ debug_assert!(last_child.next_sibling.get().is_none());
+ last_child.next_sibling.set(Some(new_child));
+ } else {
+ debug_assert!(self.first_child.get().is_none());
+ self.first_child.set(Some(new_child));
+ }
+ self.last_child.set(Some(new_child));
+ }
+
+ fn insert_before(&'arena self, new_sibling: &'arena Self) {
+ new_sibling.detach();
+ new_sibling.parent.set(self.parent.get());
+ new_sibling.next_sibling.set(Some(self));
+ if let Some(previous_sibling) = self.previous_sibling.take() {
+ new_sibling.previous_sibling.set(Some(previous_sibling));
+ debug_assert!(ptr::eq::<Node>(
+ previous_sibling.next_sibling.get().unwrap(),
+ self
+ ));
+ previous_sibling.next_sibling.set(Some(new_sibling));
+ } else if let Some(parent) = self.parent.get() {
+ debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self));
+ parent.first_child.set(Some(new_sibling));
+ }
+ self.previous_sibling.set(Some(new_sibling));
+ }
+}
+
+impl<'arena> Sink<'arena> {
+ fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> {
+ self.arena.alloc(Node::new(data))
+ }
+
+ fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A)
+ where
+ P: FnOnce() -> Option<Ref<'arena>>,
+ A: FnOnce(Ref<'arena>),
+ {
+ let new_node = match child {
+ NodeOrText::AppendText(text) => {
+ // Append to an existing Text node if we have one.
+ if let Some(&Node {
+ data: NodeData::Text { ref contents },
+ ..
+ }) = previous()
+ {
+ contents.borrow_mut().push_tendril(&text);
+ return;
+ }
+ self.new_node(NodeData::Text {
+ contents: RefCell::new(text),
+ })
+ },
+ NodeOrText::AppendNode(node) => node,
+ };
+
+ append(new_node)
+ }
+}
+
+impl<'arena> TreeSink for Sink<'arena> {
+ type Handle = Ref<'arena>;
+ type Output = Ref<'arena>;
+
+ fn finish(self) -> Ref<'arena> {
+ self.document
+ }
+
+ fn parse_error(&mut self, _: Cow<'static, str>) {}
+
+ fn get_document(&mut self) -> Ref<'arena> {
+ self.document
+ }
+
+ fn set_quirks_mode(&mut self, mode: QuirksMode) {
+ self.quirks_mode = mode;
+ }
+
+ fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
+ ptr::eq::<Node>(*x, *y)
+ }
+
+ fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> {
+ match target.data {
+ NodeData::Element { ref name, .. } => name.expanded(),
+ _ => panic!("not an element!"),
+ }
+ }
+
+ fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
+ if let NodeData::Element {
+ template_contents: Some(ref contents),
+ ..
+ } = target.data
+ {
+ contents
+ } else {
+ panic!("not a template element!")
+ }
+ }
+
+ fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool {
+ if let NodeData::Element {
+ mathml_annotation_xml_integration_point,
+ ..
+ } = target.data
+ {
+ mathml_annotation_xml_integration_point
+ } else {
+ panic!("not an element!")
+ }
+ }
+
+ fn create_element(
+ &mut self,
+ name: QualName,
+ attrs: Vec<Attribute>,
+ flags: ElementFlags,
+ ) -> Ref<'arena> {
+ self.new_node(NodeData::Element {
+ name,
+ attrs: RefCell::new(attrs),
+ template_contents: if flags.template {
+ Some(self.new_node(NodeData::Document))
+ } else {
+ None
+ },
+ mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
+ })
+ }
+
+ fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
+ self.new_node(NodeData::Comment { contents: text })
+ }
+
+ fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
+ self.new_node(NodeData::ProcessingInstruction {
+ target: target,
+ contents: data,
+ })
+ }
+
+ fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
+ self.append_common(
+ child,
+ || parent.last_child.get(),
+ |new_node| parent.append(new_node),
+ )
+ }
+
+ fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
+ self.append_common(
+ child,
+ || sibling.previous_sibling.get(),
+ |new_node| sibling.insert_before(new_node),
+ )
+ }
+
+ fn append_based_on_parent_node(
+ &mut self,
+ element: &Ref<'arena>,
+ prev_element: &Ref<'arena>,
+ child: NodeOrText<Ref<'arena>>,
+ ) {
+ if element.parent.get().is_some() {
+ self.append_before_sibling(element, child)
+ } else {
+ self.append(prev_element, child)
+ }
+ }
+
+ fn append_doctype_to_document(
+ &mut self,
+ name: StrTendril,
+ public_id: StrTendril,
+ system_id: StrTendril,
+ ) {
+ self.document.append(self.new_node(NodeData::Doctype {
+ name,
+ public_id,
+ system_id,
+ }))
+ }
+
+ fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
+ let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
+ attrs.borrow_mut()
+ } else {
+ panic!("not an element")
+ };
+
+ let existing_names = existing
+ .iter()
+ .map(|e| e.name.clone())
+ .collect::<HashSet<_>>();
+ existing.extend(
+ attrs
+ .into_iter()
+ .filter(|attr| !existing_names.contains(&attr.name)),
+ );
+ }
+
+ fn remove_from_parent(&mut self, target: &Ref<'arena>) {
+ target.detach()
+ }
+
+ fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
+ let mut next_child = node.first_child.get();
+ while let Some(child) = next_child {
+ debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
+ next_child = child.next_sibling.get();
+ new_parent.append(child)
+ }
+ }
+}