diff options
Diffstat (limited to 'src/serialize')
| -rw-r--r-- | src/serialize/mod.rs | 256 | 
1 files changed, 256 insertions, 0 deletions
| diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs new file mode 100644 index 0000000..3a57b47 --- /dev/null +++ b/src/serialize/mod.rs @@ -0,0 +1,256 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use log::warn; +pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope}; +use markup5ever::{local_name, namespace_url, ns}; +use std::default::Default; +use std::io::{self, Write}; + +use crate::{LocalName, QualName}; + +pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()> +where +    Wr: Write, +    T: Serialize, +{ +    let mut ser = HtmlSerializer::new(writer, opts.clone()); +    node.serialize(&mut ser, opts.traversal_scope) +} + +#[derive(Clone)] +pub struct SerializeOpts { +    /// Is scripting enabled? +    pub scripting_enabled: bool, + +    /// Serialize the root node? Default: ChildrenOnly +    pub traversal_scope: TraversalScope, + +    /// If the serializer is asked to serialize an invalid tree, the default +    /// behavior is to panic in the event that an `end_elem` is created without a +    /// matching `start_elem`. Setting this to true will prevent those panics by +    /// creating a default parent on the element stack. No extra start elem will +    /// actually be written. Default: false +    pub create_missing_parent: bool, +} + +impl Default for SerializeOpts { +    fn default() -> SerializeOpts { +        SerializeOpts { +            scripting_enabled: true, +            traversal_scope: TraversalScope::ChildrenOnly(None), +            create_missing_parent: false, +        } +    } +} + +#[derive(Default)] +struct ElemInfo { +    html_name: Option<LocalName>, +    ignore_children: bool +} + +pub struct HtmlSerializer<Wr: Write> { +    pub writer: Wr, +    opts: SerializeOpts, +    stack: Vec<ElemInfo>, +} + +fn tagname(name: &QualName) -> LocalName { +    match name.ns { +        ns!(html) | ns!(mathml) | ns!(svg) => (), +        ref ns => { +            // FIXME(#122) +            warn!("node with weird namespace {:?}", ns); +        }, +    } + +    name.local.clone() +} + +impl<Wr: Write> HtmlSerializer<Wr> { +    pub fn new(writer: Wr, opts: SerializeOpts) -> Self { +        let html_name = match opts.traversal_scope { +            TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None, +            TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)), +        }; +        HtmlSerializer { +            writer, +            opts, +            stack: vec![ElemInfo { +                html_name, +                ignore_children: false, +            }], +        } +    } + +    fn parent(&mut self) -> &mut ElemInfo { +        if self.stack.is_empty() { +            if self.opts.create_missing_parent { +                warn!("ElemInfo stack empty, creating new parent"); +                self.stack.push(Default::default()); +            } else { +                panic!("no parent ElemInfo") +            } +        } +        self.stack.last_mut().unwrap() +    } + +    fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> { +        for c in text.chars() { +            match c { +                '&' => self.writer.write_all(b"&"), +                '\u{00A0}' => self.writer.write_all(b" "), +                '"' if attr_mode => self.writer.write_all(b"""), +                '<' if !attr_mode => self.writer.write_all(b"<"), +                '>' if !attr_mode => self.writer.write_all(b">"), +                c => self.writer.write_fmt(format_args!("{}", c)), +            }?; +        } +        Ok(()) +    } +} + +impl<Wr: Write> Serializer for HtmlSerializer<Wr> { +    fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()> +    where +        AttrIter: Iterator<Item = AttrRef<'a>>, +    { +        let html_name = match name.ns { +            ns!(html) => Some(name.local.clone()), +            _ => None, +        }; + +        if self.parent().ignore_children { +            self.stack.push(ElemInfo { +                html_name, +                ignore_children: true, +            }); +            return Ok(()); +        } + +        self.writer.write_all(b"<")?; +        self.writer.write_all(tagname(&name).as_bytes())?; +        for (name, value) in attrs { +            self.writer.write_all(b" ")?; + +            match name.ns { +                ns!() => (), +                ns!(xml) => self.writer.write_all(b"xml:")?, +                ns!(xmlns) => { +                    if name.local != local_name!("xmlns") { +                        self.writer.write_all(b"xmlns:")?; +                    } +                }, +                ns!(xlink) => self.writer.write_all(b"xlink:")?, +                ref ns => { +                    // FIXME(#122) +                    warn!("attr with weird namespace {:?}", ns); +                    self.writer.write_all(b"unknown_namespace:")?; +                }, +            } + +            self.writer.write_all(name.local.as_bytes())?; +            self.writer.write_all(b"=\"")?; +            self.write_escaped(value, true)?; +            self.writer.write_all(b"\"")?; +        } +        self.writer.write_all(b">")?; + +        let ignore_children = name.ns == ns!(html) && +            match name.local { +                local_name!("area") | +                local_name!("base") | +                local_name!("basefont") | +                local_name!("bgsound") | +                local_name!("br") | +                local_name!("col") | +                local_name!("embed") | +                local_name!("frame") | +                local_name!("hr") | +                local_name!("img") | +                local_name!("input") | +                local_name!("keygen") | +                local_name!("link") | +                local_name!("meta") | +                local_name!("param") | +                local_name!("source") | +                local_name!("track") | +                local_name!("wbr") => true, +                _ => false, +            }; + +        self.stack.push(ElemInfo { +            html_name, +            ignore_children, +        }); + +        Ok(()) +    } + +    fn end_elem(&mut self, name: QualName) -> io::Result<()> { +        let info = match self.stack.pop() { +            Some(info) => info, +            None if self.opts.create_missing_parent => { +                warn!("missing ElemInfo, creating default."); +                Default::default() +            }, +            _ => panic!("no ElemInfo"), +        }; +        if info.ignore_children { +            return Ok(()); +        } + +        self.writer.write_all(b"</")?; +        self.writer.write_all(tagname(&name).as_bytes())?; +        self.writer.write_all(b">") +    } + +    fn write_text(&mut self, text: &str) -> io::Result<()> { +        let escape = match self.parent().html_name { +            Some(local_name!("style")) | +            Some(local_name!("script")) | +            Some(local_name!("xmp")) | +            Some(local_name!("iframe")) | +            Some(local_name!("noembed")) | +            Some(local_name!("noframes")) | +            Some(local_name!("plaintext")) => false, + +            Some(local_name!("noscript")) => !self.opts.scripting_enabled, + +            _ => true, +        }; + +        if escape { +            self.write_escaped(text, false) +        } else { +            self.writer.write_all(text.as_bytes()) +        } +    } + +    fn write_comment(&mut self, text: &str) -> io::Result<()> { +        self.writer.write_all(b"<!--")?; +        self.writer.write_all(text.as_bytes())?; +        self.writer.write_all(b"-->") +    } + +    fn write_doctype(&mut self, name: &str) -> io::Result<()> { +        self.writer.write_all(b"<!DOCTYPE ")?; +        self.writer.write_all(name.as_bytes())?; +        self.writer.write_all(b">") +    } + +    fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> { +        self.writer.write_all(b"<?")?; +        self.writer.write_all(target.as_bytes())?; +        self.writer.write_all(b" ")?; +        self.writer.write_all(data.as_bytes())?; +        self.writer.write_all(b">") +    } +} | 
