aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2023-09-12 09:03:56 +0200
committerMartin Fischer <martin@push-f.com>2023-09-28 10:36:08 +0200
commit14bc6f2cceed0fa578d6a1195266885bf57a5d4c (patch)
tree50988abce274aa5e4aa5905fb4bcc5c8cc4de652 /src
parentad6ac5f0a825775c231e76cdc9016e61e54f4141 (diff)
chore: add BasicEmitter stub
Diffstat (limited to 'src')
-rw-r--r--src/basic_emitter.rs126
-rw-r--r--src/lib.rs2
-rw-r--r--src/naive_parser.rs13
3 files changed, 134 insertions, 7 deletions
diff --git a/src/basic_emitter.rs b/src/basic_emitter.rs
new file mode 100644
index 0000000..046b645
--- /dev/null
+++ b/src/basic_emitter.rs
@@ -0,0 +1,126 @@
+use std::collections::VecDeque;
+use std::ops::Range;
+
+use crate::offset::Offset;
+use crate::Emitter;
+use crate::Error;
+use crate::Token;
+
+/// An [`Emitter`] implementation that yields [`Token`].
+pub struct BasicEmitter<O> {
+ errors: VecDeque<(Error, Range<O>)>,
+}
+
+impl<O: Default> Default for BasicEmitter<O> {
+ fn default() -> Self {
+ BasicEmitter {
+ errors: VecDeque::new(),
+ }
+ }
+}
+
+impl<O> BasicEmitter<O> {
+ /// Removes all encountered tokenizer errors and returns them as an iterator.
+ pub fn drain_errors(&mut self) -> impl Iterator<Item = (Error, Range<O>)> + '_ {
+ self.errors.drain(0..)
+ }
+}
+
+impl<O> Iterator for BasicEmitter<O> {
+ type Item = Token<O>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ todo!()
+ }
+}
+
+#[allow(unused_variables)]
+impl<O: Offset> Emitter<O> for BasicEmitter<O> {
+ fn report_error(&mut self, error: Error, span: Range<O>) {
+ todo!()
+ }
+
+ fn emit_char(&mut self, c: char) {
+ todo!()
+ }
+
+ fn emit_eof(&mut self) {
+ todo!()
+ }
+
+ fn init_start_tag(&mut self, tag_offset: O, name_offset: O) {
+ todo!()
+ }
+
+ fn init_end_tag(&mut self, tag_offset: O, name_offset: O) {
+ todo!()
+ }
+
+ fn push_tag_name(&mut self, s: &str) {
+ todo!()
+ }
+
+ fn init_attribute_name(&mut self, offset: O) {
+ todo!()
+ }
+
+ fn push_attribute_name(&mut self, s: &str) {
+ todo!()
+ }
+
+ fn push_attribute_value(&mut self, s: &str) {
+ todo!()
+ }
+
+ fn set_self_closing(&mut self, slash_span: Range<O>) {
+ todo!()
+ }
+
+ fn emit_current_tag(&mut self, offset: O) {
+ todo!()
+ }
+
+ fn init_comment(&mut self, data_start_offset: O) {
+ todo!()
+ }
+
+ fn push_comment(&mut self, s: &str) {
+ todo!()
+ }
+
+ fn emit_current_comment(&mut self, data_end_offset: O) {
+ todo!()
+ }
+
+ fn init_doctype(&mut self, offset: O) {
+ todo!()
+ }
+
+ fn push_doctype_name(&mut self, s: &str) {
+ todo!()
+ }
+
+ fn init_doctype_public_id(&mut self, offset: O) {
+ todo!()
+ }
+
+ fn push_doctype_public_id(&mut self, s: &str) {
+ todo!()
+ }
+
+ fn init_doctype_system_id(&mut self, offset: O) {
+ todo!()
+ }
+
+ fn push_doctype_system_id(&mut self, s: &str) {
+ todo!()
+ }
+
+ fn set_force_quirks(&mut self) {
+ todo!()
+ }
+
+ fn emit_current_doctype(&mut self, offset: O) {
+ todo!()
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index aecbef3..16728ad 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,6 +7,7 @@
#![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))]
#![doc = include_str!("../README.md")]
+mod basic_emitter;
mod emitter;
mod entities;
mod error;
@@ -25,6 +26,7 @@ pub mod offset;
pub mod reader;
pub mod token;
+pub use basic_emitter::BasicEmitter;
pub use emitter::Emitter;
pub use error::Error;
pub use naive_parser::NaiveParser;
diff --git a/src/naive_parser.rs b/src/naive_parser.rs
index 91edbc0..4f8dc0d 100644
--- a/src/naive_parser.rs
+++ b/src/naive_parser.rs
@@ -1,8 +1,7 @@
use crate::offset::{Offset, Position};
use crate::reader::{IntoReader, Reader};
use crate::tokenizer::CdataAction;
-use crate::tracing_emitter::TracingEmitter;
-use crate::{Emitter, Event, State, Tokenizer};
+use crate::{BasicEmitter, Emitter, Event, State, Tokenizer};
/// A naive HTML parser (**not** spec-compliant since it doesn't do tree construction).
///
@@ -13,7 +12,7 @@ use crate::{Emitter, Event, State, Tokenizer};
///
/// * it naively emits any CDATA sections as bogus comments, for example:
///
-/// ```
+/// ```no_run TODO: run again once BasicEmitter has been implemented
/// # use html5tokenizer::{NaiveParser, Token};
/// let html = "<svg><![CDATA[I love SVG]]>";
/// let mut tokens = NaiveParser::new(html).flatten();
@@ -30,18 +29,18 @@ pub struct NaiveParser<R: Reader, O: Offset, E: Emitter<O>> {
tokenizer: Tokenizer<R, O, E>,
}
-impl<R, O> NaiveParser<R, O, TracingEmitter<O>>
+impl<R, O> NaiveParser<R, O, BasicEmitter<O>>
where
R: Reader + Position<O>,
O: Offset,
{
- /// Constructs a new naive parser.
+ /// Constructs a new naive parser using the [`BasicEmitter`].
// TODO: add example for NaiveParser::new
- pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, TracingEmitter<O>>
+ pub fn new<'a, IR>(reader: IR) -> NaiveParser<R, O, BasicEmitter<O>>
where
IR: IntoReader<'a, Reader = R>,
{
- NaiveParser::new_with_emitter(reader, TracingEmitter::default())
+ NaiveParser::new_with_emitter(reader, BasicEmitter::default())
}
}