diff options
Diffstat (limited to 'examples/arena.rs')
-rw-r--r-- | examples/arena.rs | 335 |
1 files changed, 335 insertions, 0 deletions
diff --git a/examples/arena.rs b/examples/arena.rs new file mode 100644 index 0000000..1b59ae1 --- /dev/null +++ b/examples/arena.rs @@ -0,0 +1,335 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate html5ever; +extern crate typed_arena; + +use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; +use html5ever::tendril::{StrTendril, TendrilSink}; +use html5ever::{parse_document, Attribute, ExpandedName, QualName}; +use std::borrow::Cow; +use std::cell::{Cell, RefCell}; +use std::collections::HashSet; +use std::io::{self, Read}; +use std::ptr; + +fn main() { + let mut bytes = Vec::new(); + io::stdin().read_to_end(&mut bytes).unwrap(); + let arena = typed_arena::Arena::new(); + html5ever_parse_slice_into_arena(&bytes, &arena); +} + +fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> { + let sink = Sink { + arena: arena, + document: arena.alloc(Node::new(NodeData::Document)), + quirks_mode: QuirksMode::NoQuirks, + }; + parse_document(sink, Default::default()) + .from_utf8() + .one(bytes) +} + +type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>; + +type Ref<'arena> = &'arena Node<'arena>; + +type Link<'arena> = Cell<Option<Ref<'arena>>>; + +struct Sink<'arena> { + arena: Arena<'arena>, + document: Ref<'arena>, + quirks_mode: QuirksMode, +} + +pub struct Node<'arena> { + parent: Link<'arena>, + next_sibling: Link<'arena>, + previous_sibling: Link<'arena>, + first_child: Link<'arena>, + last_child: Link<'arena>, + data: NodeData<'arena>, +} + +pub enum NodeData<'arena> { + Document, + Doctype { + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + }, + Text { + contents: RefCell<StrTendril>, + }, + Comment { + contents: StrTendril, + }, + Element { + name: QualName, + attrs: RefCell<Vec<Attribute>>, + template_contents: Option<Ref<'arena>>, + mathml_annotation_xml_integration_point: bool, + }, + ProcessingInstruction { + target: StrTendril, + contents: StrTendril, + }, +} + +impl<'arena> Node<'arena> { + fn new(data: NodeData<'arena>) -> Self { + Node { + parent: Cell::new(None), + previous_sibling: Cell::new(None), + next_sibling: Cell::new(None), + first_child: Cell::new(None), + last_child: Cell::new(None), + data: data, + } + } + + fn detach(&self) { + let parent = self.parent.take(); + let previous_sibling = self.previous_sibling.take(); + let next_sibling = self.next_sibling.take(); + + if let Some(next_sibling) = next_sibling { + next_sibling.previous_sibling.set(previous_sibling); + } else if let Some(parent) = parent { + parent.last_child.set(previous_sibling); + } + + if let Some(previous_sibling) = previous_sibling { + previous_sibling.next_sibling.set(next_sibling); + } else if let Some(parent) = parent { + parent.first_child.set(next_sibling); + } + } + + fn append(&'arena self, new_child: &'arena Self) { + new_child.detach(); + new_child.parent.set(Some(self)); + if let Some(last_child) = self.last_child.take() { + new_child.previous_sibling.set(Some(last_child)); + debug_assert!(last_child.next_sibling.get().is_none()); + last_child.next_sibling.set(Some(new_child)); + } else { + debug_assert!(self.first_child.get().is_none()); + self.first_child.set(Some(new_child)); + } + self.last_child.set(Some(new_child)); + } + + fn insert_before(&'arena self, new_sibling: &'arena Self) { + new_sibling.detach(); + new_sibling.parent.set(self.parent.get()); + new_sibling.next_sibling.set(Some(self)); + if let Some(previous_sibling) = self.previous_sibling.take() { + new_sibling.previous_sibling.set(Some(previous_sibling)); + debug_assert!(ptr::eq::<Node>( + previous_sibling.next_sibling.get().unwrap(), + self + )); + previous_sibling.next_sibling.set(Some(new_sibling)); + } else if let Some(parent) = self.parent.get() { + debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self)); + parent.first_child.set(Some(new_sibling)); + } + self.previous_sibling.set(Some(new_sibling)); + } +} + +impl<'arena> Sink<'arena> { + fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> { + self.arena.alloc(Node::new(data)) + } + + fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A) + where + P: FnOnce() -> Option<Ref<'arena>>, + A: FnOnce(Ref<'arena>), + { + let new_node = match child { + NodeOrText::AppendText(text) => { + // Append to an existing Text node if we have one. + if let Some(&Node { + data: NodeData::Text { ref contents }, + .. + }) = previous() + { + contents.borrow_mut().push_tendril(&text); + return; + } + self.new_node(NodeData::Text { + contents: RefCell::new(text), + }) + }, + NodeOrText::AppendNode(node) => node, + }; + + append(new_node) + } +} + +impl<'arena> TreeSink for Sink<'arena> { + type Handle = Ref<'arena>; + type Output = Ref<'arena>; + + fn finish(self) -> Ref<'arena> { + self.document + } + + fn parse_error(&mut self, _: Cow<'static, str>) {} + + fn get_document(&mut self) -> Ref<'arena> { + self.document + } + + fn set_quirks_mode(&mut self, mode: QuirksMode) { + self.quirks_mode = mode; + } + + fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool { + ptr::eq::<Node>(*x, *y) + } + + fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> { + match target.data { + NodeData::Element { ref name, .. } => name.expanded(), + _ => panic!("not an element!"), + } + } + + fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { + if let NodeData::Element { + template_contents: Some(ref contents), + .. + } = target.data + { + contents + } else { + panic!("not a template element!") + } + } + + fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool { + if let NodeData::Element { + mathml_annotation_xml_integration_point, + .. + } = target.data + { + mathml_annotation_xml_integration_point + } else { + panic!("not an element!") + } + } + + fn create_element( + &mut self, + name: QualName, + attrs: Vec<Attribute>, + flags: ElementFlags, + ) -> Ref<'arena> { + self.new_node(NodeData::Element { + name, + attrs: RefCell::new(attrs), + template_contents: if flags.template { + Some(self.new_node(NodeData::Document)) + } else { + None + }, + mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, + }) + } + + fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> { + self.new_node(NodeData::Comment { contents: text }) + } + + fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { + self.new_node(NodeData::ProcessingInstruction { + target: target, + contents: data, + }) + } + + fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { + self.append_common( + child, + || parent.last_child.get(), + |new_node| parent.append(new_node), + ) + } + + fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { + self.append_common( + child, + || sibling.previous_sibling.get(), + |new_node| sibling.insert_before(new_node), + ) + } + + fn append_based_on_parent_node( + &mut self, + element: &Ref<'arena>, + prev_element: &Ref<'arena>, + child: NodeOrText<Ref<'arena>>, + ) { + if element.parent.get().is_some() { + self.append_before_sibling(element, child) + } else { + self.append(prev_element, child) + } + } + + fn append_doctype_to_document( + &mut self, + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + ) { + self.document.append(self.new_node(NodeData::Doctype { + name, + public_id, + system_id, + })) + } + + fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) { + let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { + attrs.borrow_mut() + } else { + panic!("not an element") + }; + + let existing_names = existing + .iter() + .map(|e| e.name.clone()) + .collect::<HashSet<_>>(); + existing.extend( + attrs + .into_iter() + .filter(|attr| !existing_names.contains(&attr.name)), + ); + } + + fn remove_from_parent(&mut self, target: &Ref<'arena>) { + target.detach() + } + + fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { + let mut next_child = node.first_child.get(); + while let Some(child) = next_child { + debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node)); + next_child = child.next_sibling.get(); + new_parent.append(child) + } + } +} |