diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib.rs | 1 | ||||
| -rw-r--r-- | src/spans.rs | 265 | 
2 files changed, 266 insertions, 0 deletions
| @@ -9,6 +9,7 @@ mod error;  mod machine;  mod never;  mod reader; +mod spans;  mod tokenizer;  mod utils; diff --git a/src/spans.rs b/src/spans.rs new file mode 100644 index 0000000..ea3409c --- /dev/null +++ b/src/spans.rs @@ -0,0 +1,265 @@ +use std::{ +    collections::{BTreeSet, VecDeque}, +    marker::PhantomData, +    mem, +}; + +use crate::{Doctype, Emitter, EndTag, Error, StartTag, Token}; + +/// The default implementation of [`crate::Emitter`], used to produce ("emit") tokens. +pub struct DefaultEmitter<S, R> { +    current_characters: String, +    current_token: Option<Token<S>>, +    last_start_tag: String, +    current_attribute: Option<(String, String)>, +    seen_attributes: BTreeSet<String>, +    emitted_tokens: VecDeque<Token<S>>, +    reader: PhantomData<R>, +} + +impl<S, R> Default for DefaultEmitter<S, R> { +    fn default() -> Self { +        DefaultEmitter { +            current_characters: String::new(), +            current_token: None, +            last_start_tag: String::new(), +            current_attribute: None, +            seen_attributes: BTreeSet::new(), +            emitted_tokens: VecDeque::new(), +            reader: PhantomData::default(), +        } +    } +} + +impl<R> DefaultEmitter<(), R> { +    fn emit_token(&mut self, token: Token<()>) { +        self.flush_current_characters(); +        self.emitted_tokens.push_front(token); +    } + +    fn flush_current_attribute(&mut self) { +        if let Some((k, v)) = self.current_attribute.take() { +            match self.current_token { +                Some(Token::StartTag(ref mut tag)) => { +                    let mut error = None; +                    tag.attributes +                        .entry(k) +                        .and_modify(|_| { +                            error = Some(Error::DuplicateAttribute); +                        }) +                        .or_insert(v); + +                    if let Some(e) = error { +                        self.emit_error(e); +                    } +                } +                Some(Token::EndTag(_)) => { +                    if !self.seen_attributes.insert(k) { +                        self.emit_error(Error::DuplicateAttribute); +                    } +                } +                _ => { +                    debug_assert!(false); +                } +            } +        } +    } + +    fn flush_current_characters(&mut self) { +        if self.current_characters.is_empty() { +            return; +        } + +        let s = mem::take(&mut self.current_characters); +        self.emit_token(Token::String(s)); +    } +} + +impl<R> Emitter<R> for DefaultEmitter<(), R> { +    type Token = Token<()>; + +    fn set_last_start_tag(&mut self, last_start_tag: Option<&str>) { +        self.last_start_tag.clear(); +        self.last_start_tag +            .push_str(last_start_tag.unwrap_or_default()); +    } + +    fn emit_eof(&mut self) { +        self.flush_current_characters(); +    } + +    fn emit_error(&mut self, error: Error) { +        // bypass character flushing in self.emit_token: we don't need the error location to be +        // that exact +        self.emitted_tokens.push_front(Token::Error(error)); +    } + +    fn pop_token(&mut self) -> Option<Self::Token> { +        self.emitted_tokens.pop_back() +    } + +    fn emit_string(&mut self, s: &str) { +        self.current_characters.push_str(s); +    } + +    fn init_start_tag(&mut self, _reader: &R) { +        self.current_token = Some(Token::StartTag(Default::default())); +    } +    fn init_end_tag(&mut self, _reader: &R) { +        self.current_token = Some(Token::EndTag(Default::default())); +        self.seen_attributes.clear(); +    } + +    fn init_comment(&mut self, _reader: &R) { +        self.current_token = Some(Token::Comment(String::new())); +    } +    fn emit_current_tag(&mut self) { +        self.flush_current_attribute(); +        let mut token = self.current_token.take().unwrap(); +        match token { +            Token::EndTag(_) => { +                if !self.seen_attributes.is_empty() { +                    self.emit_error(Error::EndTagWithAttributes); +                } +                self.seen_attributes.clear(); +            } +            Token::StartTag(ref mut _tag) => { +                self.set_last_start_tag(Some(&_tag.name)); +            } +            _ => debug_assert!(false), +        } +        self.emit_token(token); +    } +    fn emit_current_comment(&mut self) { +        let comment = self.current_token.take().unwrap(); +        debug_assert!(matches!(comment, Token::Comment(_))); +        self.emit_token(comment); +    } + +    fn emit_current_doctype(&mut self) { +        let doctype = self.current_token.take().unwrap(); +        debug_assert!(matches!(doctype, Token::Doctype(_))); +        self.emit_token(doctype); +    } + +    fn set_self_closing(&mut self) { +        let tag = self.current_token.as_mut().unwrap(); +        match tag { +            Token::StartTag(StartTag { +                ref mut self_closing, +                .. +            }) => { +                *self_closing = true; +            } +            Token::EndTag(_) => { +                self.emit_error(Error::EndTagWithTrailingSolidus); +            } +            _ => { +                debug_assert!(false); +            } +        } +    } +    fn set_force_quirks(&mut self) { +        match self.current_token { +            Some(Token::Doctype(ref mut doctype)) => doctype.force_quirks = true, +            _ => debug_assert!(false), +        } +    } +    fn push_tag_name(&mut self, s: &str) { +        match self.current_token { +            Some(Token::StartTag(StartTag { ref mut name, .. })) => { +                name.push_str(s); +            } +            Some(Token::EndTag(EndTag { ref mut name, .. })) => { +                name.push_str(s); +            } +            _ => debug_assert!(false), +        } +    } + +    fn push_comment(&mut self, s: &str) { +        match self.current_token { +            Some(Token::Comment(ref mut data)) => data.push_str(s), +            _ => debug_assert!(false), +        } +    } + +    fn push_doctype_name(&mut self, s: &str) { +        match self.current_token { +            Some(Token::Doctype(ref mut doctype)) => doctype.name.push_str(s), +            _ => debug_assert!(false), +        } +    } +    fn init_doctype(&mut self, _reader: &R) { +        self.current_token = Some(Token::Doctype(Doctype { +            name: String::new(), +            force_quirks: false, +            public_identifier: None, +            system_identifier: None, +        })); +    } + +    fn init_attribute(&mut self, _reader: &R) { +        self.flush_current_attribute(); +        self.current_attribute = Some((String::new(), String::new())); +    } +    fn push_attribute_name(&mut self, s: &str) { +        self.current_attribute.as_mut().unwrap().0.push_str(s); +    } +    fn push_attribute_value(&mut self, s: &str) { +        self.current_attribute.as_mut().unwrap().1.push_str(s); +    } +    fn set_doctype_public_identifier(&mut self, value: &str) { +        if let Some(Token::Doctype(Doctype { +            ref mut public_identifier, +            .. +        })) = self.current_token +        { +            *public_identifier = Some(value.to_owned()); +        } else { +            debug_assert!(false); +        } +    } +    fn set_doctype_system_identifier(&mut self, value: &str) { +        if let Some(Token::Doctype(Doctype { +            ref mut system_identifier, +            .. +        })) = self.current_token +        { +            *system_identifier = Some(value.to_owned()); +        } else { +            debug_assert!(false); +        } +    } +    fn push_doctype_public_identifier(&mut self, s: &str) { +        if let Some(Token::Doctype(Doctype { +            public_identifier: Some(ref mut id), +            .. +        })) = self.current_token +        { +            id.push_str(s); +        } else { +            debug_assert!(false); +        } +    } +    fn push_doctype_system_identifier(&mut self, s: &str) { +        if let Some(Token::Doctype(Doctype { +            system_identifier: Some(ref mut id), +            .. +        })) = self.current_token +        { +            id.push_str(s); +        } else { +            debug_assert!(false); +        } +    } + +    fn current_is_appropriate_end_tag_token(&mut self) -> bool { +        match self.current_token { +            Some(Token::EndTag(ref tag)) => { +                !self.last_start_tag.is_empty() && self.last_start_tag == tag.name +            } +            _ => false, +        } +    } +} | 
