diff options
author | Martin Fischer <martin@push-f.com> | 2021-04-08 12:42:04 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2021-04-08 15:40:48 +0200 |
commit | e0bef0105e0cc64bb610889b6921fd94897431d9 (patch) | |
tree | 4601b8a86778c10b65d232f99c1f5fd4b289c22a /src/tokenizer/mod.rs | |
parent | 8bb20dcdeec57b2109b05351663ec1dba9c65f84 (diff) |
drop tendril dependency
Diffstat (limited to 'src/tokenizer/mod.rs')
-rw-r--r-- | src/tokenizer/mod.rs | 127 |
1 files changed, 62 insertions, 65 deletions
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index f45c917..eb22b11 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -31,7 +31,6 @@ use std::default::Default; use std::mem::replace; pub use crate::util::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; -use tendril::StrTendril; mod char_ref; mod interface; @@ -49,10 +48,10 @@ pub enum TokenizerResult<Handle> { Script(Handle), } -fn option_push(opt_str: &mut Option<StrTendril>, c: char) { +fn option_push(opt_str: &mut Option<String>, c: char) { match *opt_str { - Some(ref mut s) => s.push_char(c), - None => *opt_str = Some(StrTendril::from_char(c)), + Some(ref mut s) => s.push(c), + None => *opt_str = Some(String::from(c)), } } @@ -132,7 +131,7 @@ pub struct Tokenizer<Sink> { current_tag_kind: TagKind, /// Current tag name. - current_tag_name: StrTendril, + current_tag_name: String, /// Current tag is self-closing? current_tag_self_closing: bool, @@ -141,22 +140,22 @@ pub struct Tokenizer<Sink> { current_tag_attrs: Vec<Attribute>, /// Current attribute name. - current_attr_name: StrTendril, + current_attr_name: String, /// Current attribute value. - current_attr_value: StrTendril, + current_attr_value: String, /// Current comment. - current_comment: StrTendril, + current_comment: String, /// Current doctype token. current_doctype: Doctype, /// Last start tag name, for use in checking "appropriate end tag". - last_start_tag_name: Option<StrTendril>, + last_start_tag_name: Option<String>, /// The "temporary buffer" mentioned in the spec. - temp_buf: StrTendril, + temp_buf: String, /// Record of how many ns we spent in each state, if profiling is enabled. state_profile: BTreeMap<states::State, u64>, @@ -173,8 +172,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer<Sink> { let start_tag_name = opts .last_start_tag_name - .take() - .map(|s| StrTendril::from(s)); + .take(); let state = opts.initial_state.unwrap_or(states::Data); let discard_bom = opts.discard_bom; Tokenizer { @@ -188,15 +186,15 @@ impl<Sink: TokenSink> Tokenizer<Sink> { ignore_lf: false, discard_bom, current_tag_kind: StartTag, - current_tag_name: StrTendril::new(), + current_tag_name: String::new(), current_tag_self_closing: false, current_tag_attrs: vec![], - current_attr_name: StrTendril::new(), - current_attr_value: StrTendril::new(), - current_comment: StrTendril::new(), + current_attr_name: String::new(), + current_attr_value: String::new(), + current_comment: String::new(), current_doctype: Doctype::new(), last_start_tag_name: start_tag_name, - temp_buf: StrTendril::new(), + temp_buf: String::new(), state_profile: BTreeMap::new(), time_in_sink: 0, current_line: 1, @@ -324,12 +322,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> { pat: &str, eq: fn(&u8, &u8) -> bool, ) -> Option<bool> { - input.push_front(replace(&mut self.temp_buf, StrTendril::new())); + input.push_front(replace(&mut self.temp_buf, String::new())); match input.eat(pat, eq) { None if self.at_eof => Some(false), None => { while let Some(c) = input.next() { - self.temp_buf.push_char(c); + self.temp_buf.push(c); } None }, @@ -398,12 +396,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> { fn emit_char(&mut self, c: char) { self.process_token_and_continue(match c { '\0' => NullCharacterToken, - _ => CharacterTokens(StrTendril::from_char(c)), + _ => CharacterTokens(String::from(c)), }); } // The string must not contain '\0'! - fn emit_chars(&mut self, b: StrTendril) { + fn emit_chars(&mut self, b: String) { self.process_token_and_continue(CharacterTokens(b)); } @@ -453,7 +451,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { fn emit_temp_buf(&mut self) { // FIXME: Make sure that clearing on emit is spec-compatible. - let buf = replace(&mut self.temp_buf, StrTendril::new()); + let buf = replace(&mut self.temp_buf, String::new()); self.emit_chars(buf); } @@ -463,7 +461,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { } fn emit_current_comment(&mut self) { - let comment = replace(&mut self.current_comment, StrTendril::new()); + let comment = replace(&mut self.current_comment, String::new()); self.process_token_and_continue(CommentToken(comment)); } @@ -475,7 +473,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { fn create_tag(&mut self, kind: TagKind, c: char) { self.discard_tag(); - self.current_tag_name.push_char(c); + self.current_tag_name.push(c); self.current_tag_kind = kind; } @@ -489,7 +487,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { fn create_attribute(&mut self, c: char) { self.finish_attribute(); - self.current_attr_name.push_char(c); + self.current_attr_name.push(c); } fn finish_attribute(&mut self) { @@ -516,7 +514,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { self.current_attr_name.clear(); self.current_tag_attrs.push(Attribute { name: name, - value: replace(&mut self.current_attr_value, StrTendril::new()), + value: replace(&mut self.current_attr_value, String::new()), }); } } @@ -526,7 +524,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { self.process_token_and_continue(DoctypeToken(doctype)); } - fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<StrTendril> { + fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<String> { match kind { Public => &mut self.current_doctype.public_id, System => &mut self.current_doctype.system_id, @@ -537,7 +535,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { let id = self.doctype_id(kind); match *id { Some(ref mut s) => s.clear(), - None => *id = Some(StrTendril::new()), + None => *id = Some(String::new()), } } @@ -573,18 +571,18 @@ impl<Sink: TokenSink> Tokenizer<Sink> { macro_rules! shorthand ( ( $me:ident : emit $c:expr ) => ( $me.emit_char($c); ); ( $me:ident : create_tag $kind:ident $c:expr ) => ( $me.create_tag($kind, $c); ); - ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.push_char($c); ); + ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.push($c); ); ( $me:ident : discard_tag ) => ( $me.discard_tag(); ); ( $me:ident : discard_char $input:expr ) => ( $me.discard_char($input); ); - ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.push_char($c); ); + ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.push($c); ); ( $me:ident : emit_temp ) => ( $me.emit_temp_buf(); ); ( $me:ident : clear_temp ) => ( $me.clear_temp_buf(); ); ( $me:ident : create_attr $c:expr ) => ( $me.create_attribute($c); ); - ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.push_char($c); ); - ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.push_char($c); ); - ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.push_tendril($c); ); - ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.push_char($c); ); - ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_slice($c); ); + ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.push($c); ); + ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.push($c); ); + ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.push_str($c); ); + ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.push($c); ); + ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_str($c); ); ( $me:ident : emit_comment ) => ( $me.emit_current_comment(); ); ( $me:ident : clear_comment ) => ( $me.current_comment.clear(); ); ( $me:ident : create_doctype ) => ( $me.current_doctype = Doctype::new(); ); @@ -1523,7 +1521,6 @@ impl<Sink: TokenSink> Tokenizer<Sink> { #[allow(non_snake_case)] mod test { use super::option_push; // private items - use tendril::{SliceExt, StrTendril}; use super::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts}; @@ -1539,7 +1536,7 @@ mod test { // vector is a collection of the line numbers that each token is on. struct LinesMatch { tokens: Vec<Token>, - current_str: StrTendril, + current_str: String, lines: Vec<(Token, u64)>, } @@ -1547,7 +1544,7 @@ mod test { fn new() -> LinesMatch { LinesMatch { tokens: vec![], - current_str: StrTendril::new(), + current_str: String::new(), lines: vec![], } } @@ -1559,7 +1556,7 @@ mod test { fn finish_str(&mut self) { if self.current_str.len() > 0 { - let s = replace(&mut self.current_str, StrTendril::new()); + let s = replace(&mut self.current_str, String::new()); self.tokens.push(CharacterTokens(s)); } } @@ -1575,11 +1572,11 @@ mod test { ) -> TokenSinkResult<Self::Handle> { match token { CharacterTokens(b) => { - self.current_str.push_slice(&b); + self.current_str.push_str(&b); }, NullCharacterToken => { - self.current_str.push_char('\0'); + self.current_str.push('\0'); }, ParseError(_) => { @@ -1610,7 +1607,7 @@ mod test { // Take in tokens, process them, and return vector with line // numbers that each token is on - fn tokenize(input: Vec<StrTendril>, opts: TokenizerOpts) -> Vec<(Token, u64)> { + fn tokenize(input: Vec<String>, opts: TokenizerOpts) -> Vec<(Token, u64)> { let sink = LinesMatch::new(); let mut tok = Tokenizer::new(sink, opts); let mut buffer = BufferQueue::new(); @@ -1623,7 +1620,7 @@ mod test { } // Create a tag token - fn create_tag(token: StrTendril, tagkind: TagKind) -> Token { + fn create_tag(token: String, tagkind: TagKind) -> Token { let name = token; let token = TagToken(Tag { kind: tagkind, @@ -1636,23 +1633,23 @@ mod test { #[test] fn push_to_None_gives_singleton() { - let mut s: Option<StrTendril> = None; + let mut s: Option<String> = None; option_push(&mut s, 'x'); - assert_eq!(s, Some("x".to_tendril())); + assert_eq!(s, Some("x".into())); } #[test] fn push_to_empty_appends() { - let mut s: Option<StrTendril> = Some(StrTendril::new()); + let mut s: Option<String> = Some(String::new()); option_push(&mut s, 'x'); - assert_eq!(s, Some("x".to_tendril())); + assert_eq!(s, Some("x".into())); } #[test] fn push_to_nonempty_appends() { - let mut s: Option<StrTendril> = Some(StrTendril::from_slice("y")); + let mut s: Option<String> = Some(String::from("y")); option_push(&mut s, 'x'); - assert_eq!(s, Some("yx".to_tendril())); + assert_eq!(s, Some("yx".into())); } #[test] @@ -1665,16 +1662,16 @@ mod test { last_start_tag_name: None, }; let vector = vec![ - StrTendril::from("<a>\n"), - StrTendril::from("<b>\n"), - StrTendril::from("</b>\n"), - StrTendril::from("</a>\n"), + String::from("<a>\n"), + String::from("<b>\n"), + String::from("</b>\n"), + String::from("</a>\n"), ]; let expected = vec![ - (create_tag(StrTendril::from("a"), StartTag), 1), - (create_tag(StrTendril::from("b"), StartTag), 2), - (create_tag(StrTendril::from("b"), EndTag), 3), - (create_tag(StrTendril::from("a"), EndTag), 4), + (create_tag(String::from("a"), StartTag), 1), + (create_tag(String::from("b"), StartTag), 2), + (create_tag(String::from("b"), EndTag), 3), + (create_tag(String::from("a"), EndTag), 4), ]; let results = tokenize(vector, opts); assert_eq!(results, expected); @@ -1690,16 +1687,16 @@ mod test { last_start_tag_name: None, }; let vector = vec![ - StrTendril::from("<a>\r\n"), - StrTendril::from("<b>\r\n"), - StrTendril::from("</b>\r\n"), - StrTendril::from("</a>\r\n"), + String::from("<a>\r\n"), + String::from("<b>\r\n"), + String::from("</b>\r\n"), + String::from("</a>\r\n"), ]; let expected = vec![ - (create_tag(StrTendril::from("a"), StartTag), 1), - (create_tag(StrTendril::from("b"), StartTag), 2), - (create_tag(StrTendril::from("b"), EndTag), 3), - (create_tag(StrTendril::from("a"), EndTag), 4), + (create_tag(String::from("a"), StartTag), 1), + (create_tag(String::from("b"), StartTag), 2), + (create_tag(String::from("b"), EndTag), 3), + (create_tag(String::from("a"), EndTag), 4), ]; let results = tokenize(vector, opts); assert_eq!(results, expected); |