diff options
Diffstat (limited to 'src/tokenizer')
-rw-r--r-- | src/tokenizer/char_ref/mod.rs | 21 | ||||
-rw-r--r-- | src/tokenizer/interface.rs | 17 | ||||
-rw-r--r-- | src/tokenizer/mod.rs | 127 |
3 files changed, 80 insertions, 85 deletions
diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs index 484a9e1..6daeb13 100644 --- a/src/tokenizer/char_ref/mod.rs +++ b/src/tokenizer/char_ref/mod.rs @@ -8,7 +8,6 @@ // except according to those terms. use super::{TokenSink, Tokenizer}; -use tendril::StrTendril; use crate::util::buffer_queue::BufferQueue; use crate::util::str::is_ascii_alnum; @@ -55,7 +54,7 @@ pub struct CharRefTokenizer { seen_digit: bool, hex_marker: Option<char>, - name_buf_opt: Option<StrTendril>, + name_buf_opt: Option<String>, name_match: Option<(u32, u32)>, name_len: usize, } @@ -84,13 +83,13 @@ impl CharRefTokenizer { self.result.expect("get_result called before done") } - fn name_buf(&self) -> &StrTendril { + fn name_buf(&self) -> &str { self.name_buf_opt .as_ref() .expect("name_buf missing in named character reference") } - fn name_buf_mut(&mut self) -> &mut StrTendril { + fn name_buf_mut(&mut self) -> &mut String { self.name_buf_opt .as_mut() .expect("name_buf missing in named character reference") @@ -189,7 +188,7 @@ impl CharRefTokenizer { _ => { self.state = Named; - self.name_buf_opt = Some(StrTendril::new()); + self.name_buf_opt = Some(String::new()); Progress }, } @@ -265,9 +264,9 @@ impl CharRefTokenizer { tokenizer: &mut Tokenizer<Sink>, input: &mut BufferQueue, ) -> Status { - let mut unconsume = StrTendril::from_char('#'); + let mut unconsume = String::from('#'); match self.hex_marker { - Some(c) => unconsume.push_char(c), + Some(c) => unconsume.push(c), None => (), } @@ -316,7 +315,7 @@ impl CharRefTokenizer { input: &mut BufferQueue, ) -> Status { let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); - self.name_buf_mut().push_char(c); + self.name_buf_mut().push(c); self.finish_named(tokenizer, input, Some(c)) } @@ -411,7 +410,7 @@ impl CharRefTokenizer { self.unconsume_name(input); self.finish_none() } else { - input.push_front(StrTendril::from_slice(&self.name_buf()[name_len..])); + input.push_front(String::from(&self.name_buf()[name_len..])); self.result = Some(CharRef { chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()], num_chars: if c2 == 0 { 1 } else { 2 }, @@ -428,7 +427,7 @@ impl CharRefTokenizer { input: &mut BufferQueue, ) -> Status { let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); - self.name_buf_mut().push_char(c); + self.name_buf_mut().push(c); match c { _ if is_ascii_alnum(c) => return Progress, ';' => self.emit_name_error(tokenizer), @@ -462,7 +461,7 @@ impl CharRefTokenizer { }, Octothorpe => { - input.push_front(StrTendril::from_slice("#")); + input.push_front(String::from("#")); tokenizer.emit_error(Borrowed("EOF after '#' in character reference")); self.finish_none(); }, diff --git a/src/tokenizer/interface.rs b/src/tokenizer/interface.rs index c331a0e..dfd9a9f 100644 --- a/src/tokenizer/interface.rs +++ b/src/tokenizer/interface.rs @@ -7,7 +7,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use tendril::StrTendril; use crate::tokenizer::states; use std::borrow::Cow; @@ -19,9 +18,9 @@ pub use self::Token::{EOFToken, NullCharacterToken, ParseError}; // FIXME: already exists in Servo DOM #[derive(PartialEq, Eq, Clone, Debug)] pub struct Doctype { - pub name: Option<StrTendril>, - pub public_id: Option<StrTendril>, - pub system_id: Option<StrTendril>, + pub name: Option<String>, + pub public_id: Option<String>, + pub system_id: Option<String>, pub force_quirks: bool, } @@ -51,16 +50,16 @@ pub enum TagKind { #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)] pub struct Attribute { /// The name of the attribute (e.g. the `class` in `<div class="test">`) - pub name: StrTendril, + pub name: String, /// The value of the attribute (e.g. the `"test"` in `<div class="test">`) - pub value: StrTendril, + pub value: String, } /// A tag token. #[derive(PartialEq, Eq, Clone, Debug)] pub struct Tag { pub kind: TagKind, - pub name: StrTendril, + pub name: String, pub self_closing: bool, pub attrs: Vec<Attribute>, } @@ -86,8 +85,8 @@ impl Tag { pub enum Token { DoctypeToken(Doctype), TagToken(Tag), - CommentToken(StrTendril), - CharacterTokens(StrTendril), + CommentToken(String), + CharacterTokens(String), NullCharacterToken, EOFToken, ParseError(Cow<'static, str>), diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index f45c917..eb22b11 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -31,7 +31,6 @@ use std::default::Default; use std::mem::replace; pub use crate::util::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; -use tendril::StrTendril; mod char_ref; mod interface; @@ -49,10 +48,10 @@ pub enum TokenizerResult<Handle> { Script(Handle), } -fn option_push(opt_str: &mut Option<StrTendril>, c: char) { +fn option_push(opt_str: &mut Option<String>, c: char) { match *opt_str { - Some(ref mut s) => s.push_char(c), - None => *opt_str = Some(StrTendril::from_char(c)), + Some(ref mut s) => s.push(c), + None => *opt_str = Some(String::from(c)), } } @@ -132,7 +131,7 @@ pub struct Tokenizer<Sink> { current_tag_kind: TagKind, /// Current tag name. - current_tag_name: StrTendril, + current_tag_name: String, /// Current tag is self-closing? current_tag_self_closing: bool, @@ -141,22 +140,22 @@ pub struct Tokenizer<Sink> { current_tag_attrs: Vec<Attribute>, /// Current attribute name. - current_attr_name: StrTendril, + current_attr_name: String, /// Current attribute value. - current_attr_value: StrTendril, + current_attr_value: String, /// Current comment. - current_comment: StrTendril, + current_comment: String, /// Current doctype token. current_doctype: Doctype, /// Last start tag name, for use in checking "appropriate end tag". - last_start_tag_name: Option<StrTendril>, + last_start_tag_name: Option<String>, /// The "temporary buffer" mentioned in the spec. - temp_buf: StrTendril, + temp_buf: String, /// Record of how many ns we spent in each state, if profiling is enabled. state_profile: BTreeMap<states::State, u64>, @@ -173,8 +172,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer<Sink> { let start_tag_name = opts .last_start_tag_name - .take() - .map(|s| StrTendril::from(s)); + .take(); let state = opts.initial_state.unwrap_or(states::Data); let discard_bom = opts.discard_bom; Tokenizer { @@ -188,15 +186,15 @@ impl<Sink: TokenSink> Tokenizer<Sink> { ignore_lf: false, discard_bom, current_tag_kind: StartTag, - current_tag_name: StrTendril::new(), + current_tag_name: String::new(), current_tag_self_closing: false, current_tag_attrs: vec![], - current_attr_name: StrTendril::new(), - current_attr_value: StrTendril::new(), - current_comment: StrTendril::new(), + current_attr_name: String::new(), + current_attr_value: String::new(), + current_comment: String::new(), current_doctype: Doctype::new(), last_start_tag_name: start_tag_name, - temp_buf: StrTendril::new(), + temp_buf: String::new(), state_profile: BTreeMap::new(), time_in_sink: 0, current_line: 1, @@ -324,12 +322,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> { pat: &str, eq: fn(&u8, &u8) -> bool, ) -> Option<bool> { - input.push_front(replace(&mut self.temp_buf, StrTendril::new())); + input.push_front(replace(&mut self.temp_buf, String::new())); match input.eat(pat, eq) { None if self.at_eof => Some(false), None => { while let Some(c) = input.next() { - self.temp_buf.push_char(c); + self.temp_buf.push(c); } None }, @@ -398,12 +396,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> { fn emit_char(&mut self, c: char) { self.process_token_and_continue(match c { '\0' => NullCharacterToken, - _ => CharacterTokens(StrTendril::from_char(c)), + _ => CharacterTokens(String::from(c)), }); } // The string must not contain '\0'! - fn emit_chars(&mut self, b: StrTendril) { + fn emit_chars(&mut self, b: String) { self.process_token_and_continue(CharacterTokens(b)); } @@ -453,7 +451,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { fn emit_temp_buf(&mut self) { // FIXME: Make sure that clearing on emit is spec-compatible. - let buf = replace(&mut self.temp_buf, StrTendril::new()); + let buf = replace(&mut self.temp_buf, String::new()); self.emit_chars(buf); } @@ -463,7 +461,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { } fn emit_current_comment(&mut self) { - let comment = replace(&mut self.current_comment, StrTendril::new()); + let comment = replace(&mut self.current_comment, String::new()); self.process_token_and_continue(CommentToken(comment)); } @@ -475,7 +473,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { fn create_tag(&mut self, kind: TagKind, c: char) { self.discard_tag(); - self.current_tag_name.push_char(c); + self.current_tag_name.push(c); self.current_tag_kind = kind; } @@ -489,7 +487,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { fn create_attribute(&mut self, c: char) { self.finish_attribute(); - self.current_attr_name.push_char(c); + self.current_attr_name.push(c); } fn finish_attribute(&mut self) { @@ -516,7 +514,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { self.current_attr_name.clear(); self.current_tag_attrs.push(Attribute { name: name, - value: replace(&mut self.current_attr_value, StrTendril::new()), + value: replace(&mut self.current_attr_value, String::new()), }); } } @@ -526,7 +524,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { self.process_token_and_continue(DoctypeToken(doctype)); } - fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<StrTendril> { + fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<String> { match kind { Public => &mut self.current_doctype.public_id, System => &mut self.current_doctype.system_id, @@ -537,7 +535,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> { let id = self.doctype_id(kind); match *id { Some(ref mut s) => s.clear(), - None => *id = Some(StrTendril::new()), + None => *id = Some(String::new()), } } @@ -573,18 +571,18 @@ impl<Sink: TokenSink> Tokenizer<Sink> { macro_rules! shorthand ( ( $me:ident : emit $c:expr ) => ( $me.emit_char($c); ); ( $me:ident : create_tag $kind:ident $c:expr ) => ( $me.create_tag($kind, $c); ); - ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.push_char($c); ); + ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.push($c); ); ( $me:ident : discard_tag ) => ( $me.discard_tag(); ); ( $me:ident : discard_char $input:expr ) => ( $me.discard_char($input); ); - ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.push_char($c); ); + ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.push($c); ); ( $me:ident : emit_temp ) => ( $me.emit_temp_buf(); ); ( $me:ident : clear_temp ) => ( $me.clear_temp_buf(); ); ( $me:ident : create_attr $c:expr ) => ( $me.create_attribute($c); ); - ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.push_char($c); ); - ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.push_char($c); ); - ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.push_tendril($c); ); - ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.push_char($c); ); - ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_slice($c); ); + ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.push($c); ); + ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.push($c); ); + ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.push_str($c); ); + ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.push($c); ); + ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_str($c); ); ( $me:ident : emit_comment ) => ( $me.emit_current_comment(); ); ( $me:ident : clear_comment ) => ( $me.current_comment.clear(); ); ( $me:ident : create_doctype ) => ( $me.current_doctype = Doctype::new(); ); @@ -1523,7 +1521,6 @@ impl<Sink: TokenSink> Tokenizer<Sink> { #[allow(non_snake_case)] mod test { use super::option_push; // private items - use tendril::{SliceExt, StrTendril}; use super::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts}; @@ -1539,7 +1536,7 @@ mod test { // vector is a collection of the line numbers that each token is on. struct LinesMatch { tokens: Vec<Token>, - current_str: StrTendril, + current_str: String, lines: Vec<(Token, u64)>, } @@ -1547,7 +1544,7 @@ mod test { fn new() -> LinesMatch { LinesMatch { tokens: vec![], - current_str: StrTendril::new(), + current_str: String::new(), lines: vec![], } } @@ -1559,7 +1556,7 @@ mod test { fn finish_str(&mut self) { if self.current_str.len() > 0 { - let s = replace(&mut self.current_str, StrTendril::new()); + let s = replace(&mut self.current_str, String::new()); self.tokens.push(CharacterTokens(s)); } } @@ -1575,11 +1572,11 @@ mod test { ) -> TokenSinkResult<Self::Handle> { match token { CharacterTokens(b) => { - self.current_str.push_slice(&b); + self.current_str.push_str(&b); }, NullCharacterToken => { - self.current_str.push_char('\0'); + self.current_str.push('\0'); }, ParseError(_) => { @@ -1610,7 +1607,7 @@ mod test { // Take in tokens, process them, and return vector with line // numbers that each token is on - fn tokenize(input: Vec<StrTendril>, opts: TokenizerOpts) -> Vec<(Token, u64)> { + fn tokenize(input: Vec<String>, opts: TokenizerOpts) -> Vec<(Token, u64)> { let sink = LinesMatch::new(); let mut tok = Tokenizer::new(sink, opts); let mut buffer = BufferQueue::new(); @@ -1623,7 +1620,7 @@ mod test { } // Create a tag token - fn create_tag(token: StrTendril, tagkind: TagKind) -> Token { + fn create_tag(token: String, tagkind: TagKind) -> Token { let name = token; let token = TagToken(Tag { kind: tagkind, @@ -1636,23 +1633,23 @@ mod test { #[test] fn push_to_None_gives_singleton() { - let mut s: Option<StrTendril> = None; + let mut s: Option<String> = None; option_push(&mut s, 'x'); - assert_eq!(s, Some("x".to_tendril())); + assert_eq!(s, Some("x".into())); } #[test] fn push_to_empty_appends() { - let mut s: Option<StrTendril> = Some(StrTendril::new()); + let mut s: Option<String> = Some(String::new()); option_push(&mut s, 'x'); - assert_eq!(s, Some("x".to_tendril())); + assert_eq!(s, Some("x".into())); } #[test] fn push_to_nonempty_appends() { - let mut s: Option<StrTendril> = Some(StrTendril::from_slice("y")); + let mut s: Option<String> = Some(String::from("y")); option_push(&mut s, 'x'); - assert_eq!(s, Some("yx".to_tendril())); + assert_eq!(s, Some("yx".into())); } #[test] @@ -1665,16 +1662,16 @@ mod test { last_start_tag_name: None, }; let vector = vec![ - StrTendril::from("<a>\n"), - StrTendril::from("<b>\n"), - StrTendril::from("</b>\n"), - StrTendril::from("</a>\n"), + String::from("<a>\n"), + String::from("<b>\n"), + String::from("</b>\n"), + String::from("</a>\n"), ]; let expected = vec![ - (create_tag(StrTendril::from("a"), StartTag), 1), - (create_tag(StrTendril::from("b"), StartTag), 2), - (create_tag(StrTendril::from("b"), EndTag), 3), - (create_tag(StrTendril::from("a"), EndTag), 4), + (create_tag(String::from("a"), StartTag), 1), + (create_tag(String::from("b"), StartTag), 2), + (create_tag(String::from("b"), EndTag), 3), + (create_tag(String::from("a"), EndTag), 4), ]; let results = tokenize(vector, opts); assert_eq!(results, expected); @@ -1690,16 +1687,16 @@ mod test { last_start_tag_name: None, }; let vector = vec![ - StrTendril::from("<a>\r\n"), - StrTendril::from("<b>\r\n"), - StrTendril::from("</b>\r\n"), - StrTendril::from("</a>\r\n"), + String::from("<a>\r\n"), + String::from("<b>\r\n"), + String::from("</b>\r\n"), + String::from("</a>\r\n"), ]; let expected = vec![ - (create_tag(StrTendril::from("a"), StartTag), 1), - (create_tag(StrTendril::from("b"), StartTag), 2), - (create_tag(StrTendril::from("b"), EndTag), 3), - (create_tag(StrTendril::from("a"), EndTag), 4), + (create_tag(String::from("a"), StartTag), 1), + (create_tag(String::from("b"), StartTag), 2), + (create_tag(String::from("b"), EndTag), 3), + (create_tag(String::from("a"), EndTag), 4), ]; let results = tokenize(vector, opts); assert_eq!(results, expected); |