diff options
author | Martin Fischer <martin@push-f.com> | 2021-04-08 08:58:38 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2021-04-08 15:40:48 +0200 |
commit | 2ade35bf1030e4eb4ccf7c02ceba04124669743c (patch) | |
tree | f6c17f8fbe39be1e79271a0ca08fbfb07c0f2a03 /src/driver.rs | |
parent | 4231388b1fd99b7dbda4dd5885d858aa251513d5 (diff) |
delete tree_builder, driver and serialize
Diffstat (limited to 'src/driver.rs')
-rw-r--r-- | src/driver.rs | 137 |
1 files changed, 0 insertions, 137 deletions
diff --git a/src/driver.rs b/src/driver.rs deleted file mode 100644 index 26db9b8..0000000 --- a/src/driver.rs +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! High-level interface to the parser. - -use crate::buffer_queue::BufferQueue; -use crate::tokenizer::{Tokenizer, TokenizerOpts, TokenizerResult}; -use crate::tree_builder::{create_element, TreeBuilder, TreeBuilderOpts, TreeSink}; -use crate::{Attribute, QualName}; - -use std::borrow::Cow; - -use crate::tendril; -use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder}; -use crate::tendril::StrTendril; - -/// All-encompassing options struct for the parser. -#[derive(Clone, Default)] -pub struct ParseOpts { - /// Tokenizer options. - pub tokenizer: TokenizerOpts, - - /// Tree builder options. - pub tree_builder: TreeBuilderOpts, -} - -/// Parse an HTML document -/// -/// The returned value implements `tendril::TendrilSink` -/// so that Unicode input may be provided incrementally, -/// or all at once with the `one` method. -/// -/// If your input is bytes, use `Parser::from_utf8`. -pub fn parse_document<Sink>(sink: Sink, opts: ParseOpts) -> Parser<Sink> -where - Sink: TreeSink, -{ - let tb = TreeBuilder::new(sink, opts.tree_builder); - let tok = Tokenizer::new(tb, opts.tokenizer); - Parser { - tokenizer: tok, - input_buffer: BufferQueue::new(), - } -} - -/// Parse an HTML fragment -/// -/// The returned value implements `tendril::TendrilSink` -/// so that Unicode input may be provided incrementally, -/// or all at once with the `one` method. -/// -/// If your input is bytes, use `Parser::from_utf8`. -pub fn parse_fragment<Sink>( - mut sink: Sink, - opts: ParseOpts, - context_name: QualName, - context_attrs: Vec<Attribute>, -) -> Parser<Sink> -where - Sink: TreeSink, -{ - let context_elem = create_element(&mut sink, context_name, context_attrs); - parse_fragment_for_element(sink, opts, context_elem, None) -} - -/// Like `parse_fragment`, but with an existing context element -/// and optionally a form element. -pub fn parse_fragment_for_element<Sink>( - sink: Sink, - opts: ParseOpts, - context_element: Sink::Handle, - form_element: Option<Sink::Handle>, -) -> Parser<Sink> -where - Sink: TreeSink, -{ - let tb = TreeBuilder::new_for_fragment(sink, context_element, form_element, opts.tree_builder); - let tok_opts = TokenizerOpts { - initial_state: Some(tb.tokenizer_state_for_context_elem()), - ..opts.tokenizer - }; - let tok = Tokenizer::new(tb, tok_opts); - Parser { - tokenizer: tok, - input_buffer: BufferQueue::new(), - } -} - -/// An HTML parser, -/// ready to receive Unicode input through the `tendril::TendrilSink` trait’s methods. -pub struct Parser<Sink> -where - Sink: TreeSink, -{ - pub tokenizer: Tokenizer<TreeBuilder<Sink::Handle, Sink>>, - pub input_buffer: BufferQueue, -} - -impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for Parser<Sink> { - fn process(&mut self, t: StrTendril) { - self.input_buffer.push_back(t); - // FIXME: Properly support </script> somehow. - while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {} - } - - // FIXME: Is it too noisy to report every character decoding error? - fn error(&mut self, desc: Cow<'static, str>) { - self.tokenizer.sink.sink.parse_error(desc) - } - - type Output = Sink::Output; - - fn finish(mut self) -> Self::Output { - // FIXME: Properly support </script> somehow. - while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {} - assert!(self.input_buffer.is_empty()); - self.tokenizer.end(); - self.tokenizer.sink.sink.finish() - } -} - -impl<Sink: TreeSink> Parser<Sink> { - /// Wrap this parser into a `TendrilSink` that accepts UTF-8 bytes. - /// - /// Use this when your input is bytes that are known to be in the UTF-8 encoding. - /// Decoding is lossy, like `String::from_utf8_lossy`. - #[allow(clippy::wrong_self_convention)] - pub fn from_utf8(self) -> Utf8LossyDecoder<Self> { - Utf8LossyDecoder::new(self) - } -} |