diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/default_emitter.rs | 427 | ||||
| -rw-r--r-- | src/emitter.rs | 424 | ||||
| -rw-r--r-- | src/lib.rs | 4 | ||||
| -rw-r--r-- | src/naive_parser.rs | 2 | 
4 files changed, 432 insertions, 425 deletions
| diff --git a/src/default_emitter.rs b/src/default_emitter.rs new file mode 100644 index 0000000..c957b20 --- /dev/null +++ b/src/default_emitter.rs @@ -0,0 +1,427 @@ +use std::collections::btree_map::Entry; +use std::collections::BTreeSet; +use std::collections::VecDeque; +use std::mem; +use std::ops::Range; + +use crate::offset::NoopOffset; +use crate::offset::Offset; +use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token}; +use crate::Emitter; +use crate::Error; + +/// The default implementation of [`Emitter`], used to produce tokens. +/// +/// # Warning +/// +/// * Using the DefaultEmitter without calling [`Tokenizer::set_state`] +///   results in wrong state transitions: +/// +///   ``` +///   # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token}; +///   let emitter = DefaultEmitter::default(); +///   let html = "<script><b>"; +///   let mut tokens = Tokenizer::new(html, emitter).flatten(); +///   assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "script")); +///   assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "b")); +///   ``` +/// +/// [`Tokenizer::set_state`]: crate::Tokenizer::set_state +pub struct DefaultEmitter<O = NoopOffset> { +    current_characters: String, +    current_token: Option<Token<O>>, +    current_attribute: Option<(String, crate::token::AttrInternal<O>)>, +    seen_attributes: BTreeSet<String>, +    emitted_tokens: VecDeque<Token<O>>, +    attr_in_end_tag_span: Option<Range<O>>, +} + +impl<O> Default for DefaultEmitter<O> { +    fn default() -> Self { +        DefaultEmitter { +            current_characters: String::new(), +            current_token: None, +            current_attribute: None, +            seen_attributes: BTreeSet::new(), +            emitted_tokens: VecDeque::new(), +            attr_in_end_tag_span: None, +        } +    } +} + +impl<O: Offset> Emitter<O> for DefaultEmitter<O> { +    type Token = Token<O>; + +    fn emit_eof(&mut self) { +        self.flush_current_characters(); +    } + +    fn emit_error(&mut self, error: Error, span: Range<O>) { +        self.push_error(error, span); +    } + +    fn pop_token(&mut self) -> Option<Self::Token> { +        self.emitted_tokens.pop_back() +    } + +    fn emit_string(&mut self, s: &str) { +        self.current_characters.push_str(s); +    } + +    fn init_start_tag(&mut self, tag_offset: O, name_offset: O) { +        self.current_token = Some(Token::StartTag(StartTag { +            span: tag_offset..O::default(), +            self_closing: false, +            name: String::new(), +            attributes: Default::default(), +            name_span: name_offset..O::default(), +        })); +    } +    fn init_end_tag(&mut self, tag_offset: O, name_offset: O) { +        self.current_token = Some(Token::EndTag(EndTag { +            span: tag_offset..O::default(), +            name: String::new(), +            name_span: name_offset..O::default(), +        })); +        self.seen_attributes.clear(); +    } + +    fn init_comment(&mut self, data_start_offset: O) { +        self.current_token = Some(Token::Comment(Comment { +            data: String::new(), +            data_span: data_start_offset..O::default(), +        })); +    } +    fn emit_current_tag(&mut self, offset: O) { +        self.flush_current_attribute(); +        let mut token = self.current_token.take().unwrap(); +        match &mut token { +            Token::EndTag(tag) => { +                if !self.seen_attributes.is_empty() { +                    let span = self.attr_in_end_tag_span.take().unwrap(); +                    self.push_error(Error::EndTagWithAttributes, span); +                } +                self.seen_attributes.clear(); +                tag.span.end = offset; +            } +            Token::StartTag(tag) => { +                tag.span.end = offset; +            } +            _ => debug_assert!(false), +        } +        self.emit_token(token); +    } +    fn emit_current_comment(&mut self, data_end_offset: O) { +        let mut token = self.current_token.take().unwrap(); +        if let Token::Comment(comment) = &mut token { +            comment.data_span.end = data_end_offset; +        } else { +            debug_assert!(false); +        } +        self.emit_token(token); +    } + +    fn emit_current_doctype(&mut self, offset: O) { +        let Some(Token::Doctype(mut doctype)) = self.current_token.take() else { +            debug_assert!(false); +            return; +        }; +        doctype.span.end = offset; +        self.emit_token(Token::Doctype(doctype)); +    } + +    fn set_self_closing(&mut self, slash_span: Range<O>) { +        let tag = self.current_token.as_mut().unwrap(); +        match tag { +            Token::StartTag(StartTag { +                ref mut self_closing, +                .. +            }) => { +                *self_closing = true; +            } +            Token::EndTag(_) => { +                self.emit_error(Error::EndTagWithTrailingSolidus, slash_span); +            } +            _ => { +                debug_assert!(false); +            } +        } +    } +    fn set_force_quirks(&mut self) { +        match self.current_token { +            Some(Token::Doctype(ref mut doctype)) => doctype.force_quirks = true, +            _ => debug_assert!(false), +        } +    } +    fn push_tag_name(&mut self, s: &str) { +        match self.current_token { +            Some(Token::StartTag(StartTag { ref mut name, .. })) => { +                name.push_str(s); +            } +            Some(Token::EndTag(EndTag { ref mut name, .. })) => { +                name.push_str(s); +            } +            _ => debug_assert!(false), +        } +    } + +    fn terminate_tag_name(&mut self, offset: O) { +        match self.current_token { +            Some(Token::StartTag(StartTag { +                ref mut name_span, .. +            })) => { +                name_span.end = offset; +            } +            Some(Token::EndTag(EndTag { +                ref mut name_span, .. +            })) => { +                name_span.end = offset; +            } +            _ => debug_assert!(false), +        } +    } + +    fn push_comment(&mut self, s: &str) { +        match self.current_token { +            Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s), +            _ => debug_assert!(false), +        } +    } + +    fn init_doctype_name(&mut self, offset: O) { +        let Some(Token::Doctype(doctype)) = &mut self.current_token else { +            debug_assert!(false); +            return; +        }; +        doctype.name = Some("".into()); +        doctype.name_span.start = offset; +    } + +    fn push_doctype_name(&mut self, s: &str) { +        match self.current_token { +            Some(Token::Doctype(Doctype { +                name: Some(ref mut name), +                .. +            })) => name.push_str(s), +            _ => debug_assert!(false), +        } +    } + +    fn terminate_doctype_name(&mut self, offset: O) { +        let Some(Token::Doctype(doctype)) = &mut self.current_token else { +            debug_assert!(false); +            return; +        }; +        doctype.name_span.end = offset; +    } + +    fn init_doctype(&mut self, offset: O) { +        self.current_token = Some(Token::Doctype(Doctype { +            name: None, +            force_quirks: false, +            public_id: None, +            system_id: None, +            span: offset..O::default(), +            name_span: O::default()..O::default(), +            public_id_span: O::default()..O::default(), +            system_id_span: O::default()..O::default(), +        })); +    } + +    fn init_attribute_name(&mut self, offset: O) { +        self.flush_current_attribute(); +        self.current_attribute = Some(( +            String::new(), +            crate::token::AttrInternal { +                name_span: offset..O::default(), +                value: String::new(), +                value_span: O::default()..O::default(), +                value_syntax: None, +            }, +        )); +    } +    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) { +        let (_, current_attribute) = self.current_attribute.as_mut().unwrap(); +        current_attribute.value_span.start = offset; +        current_attribute.value_syntax = Some(syntax); +    } + +    fn push_attribute_name(&mut self, s: &str) { +        let current_attr = self.current_attribute.as_mut().unwrap(); +        current_attr.0.push_str(s); +    } + +    fn terminate_attribute_name(&mut self, offset: O) { +        let current_attr = self.current_attribute.as_mut().unwrap(); +        current_attr.1.name_span.end = offset; +    } + +    fn push_attribute_value(&mut self, s: &str) { +        let current_attr = self.current_attribute.as_mut().unwrap(); +        current_attr.1.value.push_str(s); +    } + +    fn terminate_attribute_value(&mut self, offset: O) { +        let current_attr = self.current_attribute.as_mut().unwrap(); +        current_attr.1.value_span.end = offset; +    } + +    fn init_doctype_public_id(&mut self, offset: O) { +        let Some(Token::Doctype(doctype)) = &mut self.current_token else { +            debug_assert!(false); +            return; +        }; +        doctype.public_id = Some("".to_owned()); +        doctype.public_id_span.start = offset; +    } +    fn init_doctype_system_id(&mut self, offset: O) { +        let Some(Token::Doctype(doctype)) = &mut self.current_token else { +            debug_assert!(false); +            return; +        }; +        doctype.system_id = Some("".to_owned()); +        doctype.system_id_span.start = offset; +    } +    fn push_doctype_public_id(&mut self, s: &str) { +        if let Some(Token::Doctype(Doctype { +            public_id: Some(ref mut id), +            .. +        })) = self.current_token +        { +            id.push_str(s); +        } else { +            debug_assert!(false); +        } +    } + +    fn terminate_doctype_public_id(&mut self, offset: O) { +        if let Some(Token::Doctype(Doctype { +            ref mut public_id_span, +            .. +        })) = self.current_token +        { +            public_id_span.end = offset; +        } else { +            debug_assert!(false); +        } +    } + +    fn push_doctype_system_id(&mut self, s: &str) { +        if let Some(Token::Doctype(Doctype { +            system_id: Some(ref mut id), +            .. +        })) = self.current_token +        { +            id.push_str(s); +        } else { +            debug_assert!(false); +        } +    } + +    fn terminate_doctype_system_id(&mut self, offset: O) { +        if let Some(Token::Doctype(Doctype { +            ref mut system_id_span, +            .. +        })) = self.current_token +        { +            system_id_span.end = offset; +        } else { +            debug_assert!(false); +        } +    } +} + +impl<O> DefaultEmitter<O> { +    fn emit_token(&mut self, token: Token<O>) { +        self.flush_current_characters(); +        self.emitted_tokens.push_front(token); +    } + +    fn flush_current_attribute(&mut self) +    where +        O: Offset, +    { +        if let Some((name, map_val)) = self.current_attribute.take() { +            match self.current_token { +                Some(Token::StartTag(ref mut tag)) => match tag.attributes.inner.entry(name) { +                    Entry::Vacant(vacant) => { +                        vacant.insert(map_val); +                    } +                    Entry::Occupied(_) => { +                        self.push_error(Error::DuplicateAttribute, map_val.name_span); +                    } +                }, +                Some(Token::EndTag(_)) => { +                    self.attr_in_end_tag_span = Some(map_val.name_span.clone()); +                    if !self.seen_attributes.insert(name) { +                        self.push_error(Error::DuplicateAttribute, map_val.name_span); +                    } +                } +                _ => { +                    debug_assert!(false); +                } +            } +        } +    } + +    fn flush_current_characters(&mut self) { +        if self.current_characters.is_empty() { +            return; +        } + +        let s = mem::take(&mut self.current_characters); +        self.emit_token(Token::String(s)); +    } + +    fn push_error(&mut self, error: Error, span: Range<O>) { +        // bypass character flushing in self.emit_token: we don't need the error location to be +        // that exact +        self.emitted_tokens.push_front(Token::Error { error, span }); +    } +} + +/// The majority of our testing of the [`DefaultEmitter`] is done against the +/// html5lib-tests in the html5lib integration test. This module only tests +/// details that aren't present in the html5lib test data. +#[cfg(test)] +mod tests { +    use super::DefaultEmitter; +    use crate::token::{AttrValueSyntax, Token}; +    use crate::{Event, Tokenizer}; + +    #[test] +    fn test_attribute_value_syntax() { +        let mut tokenizer = Tokenizer::new( +            "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">", +            DefaultEmitter::default(), +        ) +        .flatten(); +        let Event::Token(Token::StartTag(start_tag)) = tokenizer.next().unwrap() else { +            panic!("expected start tag"); +        }; +        assert_eq!( +            start_tag.attributes.get("empty").unwrap().value_syntax(), +            None +        ); +        assert_eq!( +            start_tag.attributes.get("unquoted").unwrap().value_syntax(), +            Some(AttrValueSyntax::Unquoted) +        ); +        assert_eq!( +            start_tag +                .attributes +                .get("single-quoted") +                .unwrap() +                .value_syntax(), +            Some(AttrValueSyntax::SingleQuoted) +        ); +        assert_eq!( +            start_tag +                .attributes +                .get("double-quoted") +                .unwrap() +                .value_syntax(), +            Some(AttrValueSyntax::DoubleQuoted) +        ); +    } +} diff --git a/src/emitter.rs b/src/emitter.rs index 23f9ede..311c73c 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -1,12 +1,6 @@ -use std::collections::btree_map::Entry; -use std::collections::BTreeSet; -use std::collections::VecDeque; -use std::mem;  use std::ops::Range; -use crate::offset::NoopOffset; -use crate::offset::Offset; -use crate::token::{AttrValueSyntax, Comment, Doctype, EndTag, StartTag, Token}; +use crate::token::AttrValueSyntax;  use crate::Error;  /// An emitter is an object providing methods to the tokenizer to produce ("emit") tokens. @@ -193,419 +187,3 @@ pub trait Emitter<O> {      /// [`push_doctype_system_id`]: Self::push_doctype_system_id      fn terminate_doctype_system_id(&mut self, offset: O) {}  } - -/// The default implementation of [`Emitter`], used to produce tokens. -/// -/// # Warning -/// -/// * Using the DefaultEmitter without calling [`Tokenizer::set_state`] -///   results in wrong state transitions: -/// -///   ``` -///   # use html5tokenizer::{DefaultEmitter, Event, Tokenizer, Token}; -///   let emitter = DefaultEmitter::default(); -///   let html = "<script><b>"; -///   let mut tokens = Tokenizer::new(html, emitter).flatten(); -///   assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "script")); -///   assert!(matches!(tokens.next().unwrap(), Event::Token(Token::StartTag(tag)) if tag.name == "b")); -///   ``` -/// -/// [`Tokenizer::set_state`]: crate::Tokenizer::set_state -pub struct DefaultEmitter<O = NoopOffset> { -    current_characters: String, -    current_token: Option<Token<O>>, -    current_attribute: Option<(String, crate::token::AttrInternal<O>)>, -    seen_attributes: BTreeSet<String>, -    emitted_tokens: VecDeque<Token<O>>, -    attr_in_end_tag_span: Option<Range<O>>, -} - -impl<O> Default for DefaultEmitter<O> { -    fn default() -> Self { -        DefaultEmitter { -            current_characters: String::new(), -            current_token: None, -            current_attribute: None, -            seen_attributes: BTreeSet::new(), -            emitted_tokens: VecDeque::new(), -            attr_in_end_tag_span: None, -        } -    } -} - -impl<O> DefaultEmitter<O> { -    fn emit_token(&mut self, token: Token<O>) { -        self.flush_current_characters(); -        self.emitted_tokens.push_front(token); -    } - -    fn flush_current_attribute(&mut self) -    where -        O: Offset, -    { -        if let Some((name, map_val)) = self.current_attribute.take() { -            match self.current_token { -                Some(Token::StartTag(ref mut tag)) => match tag.attributes.inner.entry(name) { -                    Entry::Vacant(vacant) => { -                        vacant.insert(map_val); -                    } -                    Entry::Occupied(_) => { -                        self.push_error(Error::DuplicateAttribute, map_val.name_span); -                    } -                }, -                Some(Token::EndTag(_)) => { -                    self.attr_in_end_tag_span = Some(map_val.name_span.clone()); -                    if !self.seen_attributes.insert(name) { -                        self.push_error(Error::DuplicateAttribute, map_val.name_span); -                    } -                } -                _ => { -                    debug_assert!(false); -                } -            } -        } -    } - -    fn flush_current_characters(&mut self) { -        if self.current_characters.is_empty() { -            return; -        } - -        let s = mem::take(&mut self.current_characters); -        self.emit_token(Token::String(s)); -    } - -    fn push_error(&mut self, error: Error, span: Range<O>) { -        // bypass character flushing in self.emit_token: we don't need the error location to be -        // that exact -        self.emitted_tokens.push_front(Token::Error { error, span }); -    } -} - -impl<O: Offset> Emitter<O> for DefaultEmitter<O> { -    type Token = Token<O>; - -    fn emit_eof(&mut self) { -        self.flush_current_characters(); -    } - -    fn emit_error(&mut self, error: Error, span: Range<O>) { -        self.push_error(error, span); -    } - -    fn pop_token(&mut self) -> Option<Self::Token> { -        self.emitted_tokens.pop_back() -    } - -    fn emit_string(&mut self, s: &str) { -        self.current_characters.push_str(s); -    } - -    fn init_start_tag(&mut self, tag_offset: O, name_offset: O) { -        self.current_token = Some(Token::StartTag(StartTag { -            span: tag_offset..O::default(), -            self_closing: false, -            name: String::new(), -            attributes: Default::default(), -            name_span: name_offset..O::default(), -        })); -    } -    fn init_end_tag(&mut self, tag_offset: O, name_offset: O) { -        self.current_token = Some(Token::EndTag(EndTag { -            span: tag_offset..O::default(), -            name: String::new(), -            name_span: name_offset..O::default(), -        })); -        self.seen_attributes.clear(); -    } - -    fn init_comment(&mut self, data_start_offset: O) { -        self.current_token = Some(Token::Comment(Comment { -            data: String::new(), -            data_span: data_start_offset..O::default(), -        })); -    } -    fn emit_current_tag(&mut self, offset: O) { -        self.flush_current_attribute(); -        let mut token = self.current_token.take().unwrap(); -        match &mut token { -            Token::EndTag(tag) => { -                if !self.seen_attributes.is_empty() { -                    let span = self.attr_in_end_tag_span.take().unwrap(); -                    self.push_error(Error::EndTagWithAttributes, span); -                } -                self.seen_attributes.clear(); -                tag.span.end = offset; -            } -            Token::StartTag(tag) => { -                tag.span.end = offset; -            } -            _ => debug_assert!(false), -        } -        self.emit_token(token); -    } -    fn emit_current_comment(&mut self, data_end_offset: O) { -        let mut token = self.current_token.take().unwrap(); -        if let Token::Comment(comment) = &mut token { -            comment.data_span.end = data_end_offset; -        } else { -            debug_assert!(false); -        } -        self.emit_token(token); -    } - -    fn emit_current_doctype(&mut self, offset: O) { -        let Some(Token::Doctype(mut doctype)) = self.current_token.take() else { -            debug_assert!(false); -            return; -        }; -        doctype.span.end = offset; -        self.emit_token(Token::Doctype(doctype)); -    } - -    fn set_self_closing(&mut self, slash_span: Range<O>) { -        let tag = self.current_token.as_mut().unwrap(); -        match tag { -            Token::StartTag(StartTag { -                ref mut self_closing, -                .. -            }) => { -                *self_closing = true; -            } -            Token::EndTag(_) => { -                self.emit_error(Error::EndTagWithTrailingSolidus, slash_span); -            } -            _ => { -                debug_assert!(false); -            } -        } -    } -    fn set_force_quirks(&mut self) { -        match self.current_token { -            Some(Token::Doctype(ref mut doctype)) => doctype.force_quirks = true, -            _ => debug_assert!(false), -        } -    } -    fn push_tag_name(&mut self, s: &str) { -        match self.current_token { -            Some(Token::StartTag(StartTag { ref mut name, .. })) => { -                name.push_str(s); -            } -            Some(Token::EndTag(EndTag { ref mut name, .. })) => { -                name.push_str(s); -            } -            _ => debug_assert!(false), -        } -    } - -    fn terminate_tag_name(&mut self, offset: O) { -        match self.current_token { -            Some(Token::StartTag(StartTag { -                ref mut name_span, .. -            })) => { -                name_span.end = offset; -            } -            Some(Token::EndTag(EndTag { -                ref mut name_span, .. -            })) => { -                name_span.end = offset; -            } -            _ => debug_assert!(false), -        } -    } - -    fn push_comment(&mut self, s: &str) { -        match self.current_token { -            Some(Token::Comment(Comment { ref mut data, .. })) => data.push_str(s), -            _ => debug_assert!(false), -        } -    } - -    fn init_doctype_name(&mut self, offset: O) { -        let Some(Token::Doctype(doctype)) = &mut self.current_token else { -            debug_assert!(false); -            return; -        }; -        doctype.name = Some("".into()); -        doctype.name_span.start = offset; -    } - -    fn push_doctype_name(&mut self, s: &str) { -        match self.current_token { -            Some(Token::Doctype(Doctype { -                name: Some(ref mut name), -                .. -            })) => name.push_str(s), -            _ => debug_assert!(false), -        } -    } - -    fn terminate_doctype_name(&mut self, offset: O) { -        let Some(Token::Doctype(doctype)) = &mut self.current_token else { -            debug_assert!(false); -            return; -        }; -        doctype.name_span.end = offset; -    } - -    fn init_doctype(&mut self, offset: O) { -        self.current_token = Some(Token::Doctype(Doctype { -            name: None, -            force_quirks: false, -            public_id: None, -            system_id: None, -            span: offset..O::default(), -            name_span: O::default()..O::default(), -            public_id_span: O::default()..O::default(), -            system_id_span: O::default()..O::default(), -        })); -    } - -    fn init_attribute_name(&mut self, offset: O) { -        self.flush_current_attribute(); -        self.current_attribute = Some(( -            String::new(), -            crate::token::AttrInternal { -                name_span: offset..O::default(), -                value: String::new(), -                value_span: O::default()..O::default(), -                value_syntax: None, -            }, -        )); -    } -    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) { -        let (_, current_attribute) = self.current_attribute.as_mut().unwrap(); -        current_attribute.value_span.start = offset; -        current_attribute.value_syntax = Some(syntax); -    } - -    fn push_attribute_name(&mut self, s: &str) { -        let current_attr = self.current_attribute.as_mut().unwrap(); -        current_attr.0.push_str(s); -    } - -    fn terminate_attribute_name(&mut self, offset: O) { -        let current_attr = self.current_attribute.as_mut().unwrap(); -        current_attr.1.name_span.end = offset; -    } - -    fn push_attribute_value(&mut self, s: &str) { -        let current_attr = self.current_attribute.as_mut().unwrap(); -        current_attr.1.value.push_str(s); -    } - -    fn terminate_attribute_value(&mut self, offset: O) { -        let current_attr = self.current_attribute.as_mut().unwrap(); -        current_attr.1.value_span.end = offset; -    } - -    fn init_doctype_public_id(&mut self, offset: O) { -        let Some(Token::Doctype(doctype)) = &mut self.current_token else { -            debug_assert!(false); -            return; -        }; -        doctype.public_id = Some("".to_owned()); -        doctype.public_id_span.start = offset; -    } -    fn init_doctype_system_id(&mut self, offset: O) { -        let Some(Token::Doctype(doctype)) = &mut self.current_token else { -            debug_assert!(false); -            return; -        }; -        doctype.system_id = Some("".to_owned()); -        doctype.system_id_span.start = offset; -    } -    fn push_doctype_public_id(&mut self, s: &str) { -        if let Some(Token::Doctype(Doctype { -            public_id: Some(ref mut id), -            .. -        })) = self.current_token -        { -            id.push_str(s); -        } else { -            debug_assert!(false); -        } -    } - -    fn terminate_doctype_public_id(&mut self, offset: O) { -        if let Some(Token::Doctype(Doctype { -            ref mut public_id_span, -            .. -        })) = self.current_token -        { -            public_id_span.end = offset; -        } else { -            debug_assert!(false); -        } -    } - -    fn push_doctype_system_id(&mut self, s: &str) { -        if let Some(Token::Doctype(Doctype { -            system_id: Some(ref mut id), -            .. -        })) = self.current_token -        { -            id.push_str(s); -        } else { -            debug_assert!(false); -        } -    } - -    fn terminate_doctype_system_id(&mut self, offset: O) { -        if let Some(Token::Doctype(Doctype { -            ref mut system_id_span, -            .. -        })) = self.current_token -        { -            system_id_span.end = offset; -        } else { -            debug_assert!(false); -        } -    } -} - -/// The majority of our testing of the [`DefaultEmitter`] is done against the -/// html5lib-tests in the html5lib integration test. This module only tests -/// details that aren't present in the html5lib test data. -#[cfg(test)] -mod tests { -    use super::DefaultEmitter; -    use crate::token::{AttrValueSyntax, Token}; -    use crate::{Event, Tokenizer}; - -    #[test] -    fn test_attribute_value_syntax() { -        let mut tokenizer = Tokenizer::new( -            "<div empty unquoted=foo single-quoted='foo' double-quoted=\"foo\">", -            DefaultEmitter::default(), -        ) -        .flatten(); -        let Event::Token(Token::StartTag(start_tag)) = tokenizer.next().unwrap() else { -            panic!("expected start tag"); -        }; -        assert_eq!( -            start_tag.attributes.get("empty").unwrap().value_syntax(), -            None -        ); -        assert_eq!( -            start_tag.attributes.get("unquoted").unwrap().value_syntax(), -            Some(AttrValueSyntax::Unquoted) -        ); -        assert_eq!( -            start_tag -                .attributes -                .get("single-quoted") -                .unwrap() -                .value_syntax(), -            Some(AttrValueSyntax::SingleQuoted) -        ); -        assert_eq!( -            start_tag -                .attributes -                .get("double-quoted") -                .unwrap() -                .value_syntax(), -            Some(AttrValueSyntax::DoubleQuoted) -        ); -    } -} @@ -7,6 +7,7 @@  #![doc = concat!("[the LICENSE file]: ", file_url!("LICENSE"))]  #![doc = include_str!("../README.md")] +mod default_emitter;  mod emitter;  mod entities;  mod error; @@ -25,7 +26,8 @@ pub mod offset;  pub mod reader;  pub mod token; -pub use emitter::{DefaultEmitter, Emitter}; +pub use default_emitter::DefaultEmitter; +pub use emitter::Emitter;  pub use error::Error;  pub use naive_parser::NaiveParser;  pub use token::{Comment, Doctype, EndTag, StartTag, Token}; diff --git a/src/naive_parser.rs b/src/naive_parser.rs index f42072a..c305343 100644 --- a/src/naive_parser.rs +++ b/src/naive_parser.rs @@ -1,4 +1,4 @@ -use crate::emitter::DefaultEmitter; +use crate::default_emitter::DefaultEmitter;  use crate::offset::{Offset, Position};  use crate::reader::{IntoReader, Reader};  use crate::tokenizer::CdataAction; | 
