diff options
Diffstat (limited to 'src/tokenizer')
| -rw-r--r-- | src/tokenizer/char_ref/mod.rs | 21 | ||||
| -rw-r--r-- | src/tokenizer/interface.rs | 17 | ||||
| -rw-r--r-- | src/tokenizer/mod.rs | 127 | 
3 files changed, 80 insertions, 85 deletions
| diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs index 484a9e1..6daeb13 100644 --- a/src/tokenizer/char_ref/mod.rs +++ b/src/tokenizer/char_ref/mod.rs @@ -8,7 +8,6 @@  // except according to those terms.  use super::{TokenSink, Tokenizer}; -use tendril::StrTendril;  use crate::util::buffer_queue::BufferQueue;  use crate::util::str::is_ascii_alnum; @@ -55,7 +54,7 @@ pub struct CharRefTokenizer {      seen_digit: bool,      hex_marker: Option<char>, -    name_buf_opt: Option<StrTendril>, +    name_buf_opt: Option<String>,      name_match: Option<(u32, u32)>,      name_len: usize,  } @@ -84,13 +83,13 @@ impl CharRefTokenizer {          self.result.expect("get_result called before done")      } -    fn name_buf(&self) -> &StrTendril { +    fn name_buf(&self) -> &str {          self.name_buf_opt              .as_ref()              .expect("name_buf missing in named character reference")      } -    fn name_buf_mut(&mut self) -> &mut StrTendril { +    fn name_buf_mut(&mut self) -> &mut String {          self.name_buf_opt              .as_mut()              .expect("name_buf missing in named character reference") @@ -189,7 +188,7 @@ impl CharRefTokenizer {              _ => {                  self.state = Named; -                self.name_buf_opt = Some(StrTendril::new()); +                self.name_buf_opt = Some(String::new());                  Progress              },          } @@ -265,9 +264,9 @@ impl CharRefTokenizer {          tokenizer: &mut Tokenizer<Sink>,          input: &mut BufferQueue,      ) -> Status { -        let mut unconsume = StrTendril::from_char('#'); +        let mut unconsume = String::from('#');          match self.hex_marker { -            Some(c) => unconsume.push_char(c), +            Some(c) => unconsume.push(c),              None => (),          } @@ -316,7 +315,7 @@ impl CharRefTokenizer {          input: &mut BufferQueue,      ) -> Status {          let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); -        self.name_buf_mut().push_char(c); +        self.name_buf_mut().push(c);          self.finish_named(tokenizer, input, Some(c))      } @@ -411,7 +410,7 @@ impl CharRefTokenizer {                      self.unconsume_name(input);                      self.finish_none()                  } else { -                    input.push_front(StrTendril::from_slice(&self.name_buf()[name_len..])); +                    input.push_front(String::from(&self.name_buf()[name_len..]));                      self.result = Some(CharRef {                          chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()],                          num_chars: if c2 == 0 { 1 } else { 2 }, @@ -428,7 +427,7 @@ impl CharRefTokenizer {          input: &mut BufferQueue,      ) -> Status {          let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); -        self.name_buf_mut().push_char(c); +        self.name_buf_mut().push(c);          match c {              _ if is_ascii_alnum(c) => return Progress,              ';' => self.emit_name_error(tokenizer), @@ -462,7 +461,7 @@ impl CharRefTokenizer {                  },                  Octothorpe => { -                    input.push_front(StrTendril::from_slice("#")); +                    input.push_front(String::from("#"));                      tokenizer.emit_error(Borrowed("EOF after '#' in character reference"));                      self.finish_none();                  }, diff --git a/src/tokenizer/interface.rs b/src/tokenizer/interface.rs index c331a0e..dfd9a9f 100644 --- a/src/tokenizer/interface.rs +++ b/src/tokenizer/interface.rs @@ -7,7 +7,6 @@  // option. This file may not be copied, modified, or distributed  // except according to those terms. -use tendril::StrTendril;  use crate::tokenizer::states;  use std::borrow::Cow; @@ -19,9 +18,9 @@ pub use self::Token::{EOFToken, NullCharacterToken, ParseError};  // FIXME: already exists in Servo DOM  #[derive(PartialEq, Eq, Clone, Debug)]  pub struct Doctype { -    pub name: Option<StrTendril>, -    pub public_id: Option<StrTendril>, -    pub system_id: Option<StrTendril>, +    pub name: Option<String>, +    pub public_id: Option<String>, +    pub system_id: Option<String>,      pub force_quirks: bool,  } @@ -51,16 +50,16 @@ pub enum TagKind {  #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]  pub struct Attribute {      /// The name of the attribute (e.g. the `class` in `<div class="test">`) -    pub name: StrTendril, +    pub name: String,      /// The value of the attribute (e.g. the `"test"` in `<div class="test">`) -    pub value: StrTendril, +    pub value: String,  }  /// A tag token.  #[derive(PartialEq, Eq, Clone, Debug)]  pub struct Tag {      pub kind: TagKind, -    pub name: StrTendril, +    pub name: String,      pub self_closing: bool,      pub attrs: Vec<Attribute>,  } @@ -86,8 +85,8 @@ impl Tag {  pub enum Token {      DoctypeToken(Doctype),      TagToken(Tag), -    CommentToken(StrTendril), -    CharacterTokens(StrTendril), +    CommentToken(String), +    CharacterTokens(String),      NullCharacterToken,      EOFToken,      ParseError(Cow<'static, str>), diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index f45c917..eb22b11 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -31,7 +31,6 @@ use std::default::Default;  use std::mem::replace;  pub use crate::util::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; -use tendril::StrTendril;  mod char_ref;  mod interface; @@ -49,10 +48,10 @@ pub enum TokenizerResult<Handle> {      Script(Handle),  } -fn option_push(opt_str: &mut Option<StrTendril>, c: char) { +fn option_push(opt_str: &mut Option<String>, c: char) {      match *opt_str { -        Some(ref mut s) => s.push_char(c), -        None => *opt_str = Some(StrTendril::from_char(c)), +        Some(ref mut s) => s.push(c), +        None => *opt_str = Some(String::from(c)),      }  } @@ -132,7 +131,7 @@ pub struct Tokenizer<Sink> {      current_tag_kind: TagKind,      /// Current tag name. -    current_tag_name: StrTendril, +    current_tag_name: String,      /// Current tag is self-closing?      current_tag_self_closing: bool, @@ -141,22 +140,22 @@ pub struct Tokenizer<Sink> {      current_tag_attrs: Vec<Attribute>,      /// Current attribute name. -    current_attr_name: StrTendril, +    current_attr_name: String,      /// Current attribute value. -    current_attr_value: StrTendril, +    current_attr_value: String,      /// Current comment. -    current_comment: StrTendril, +    current_comment: String,      /// Current doctype token.      current_doctype: Doctype,      /// Last start tag name, for use in checking "appropriate end tag". -    last_start_tag_name: Option<StrTendril>, +    last_start_tag_name: Option<String>,      /// The "temporary buffer" mentioned in the spec. -    temp_buf: StrTendril, +    temp_buf: String,      /// Record of how many ns we spent in each state, if profiling is enabled.      state_profile: BTreeMap<states::State, u64>, @@ -173,8 +172,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {      pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer<Sink> {          let start_tag_name = opts              .last_start_tag_name -            .take() -            .map(|s| StrTendril::from(s)); +            .take();          let state = opts.initial_state.unwrap_or(states::Data);          let discard_bom = opts.discard_bom;          Tokenizer { @@ -188,15 +186,15 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              ignore_lf: false,              discard_bom,              current_tag_kind: StartTag, -            current_tag_name: StrTendril::new(), +            current_tag_name: String::new(),              current_tag_self_closing: false,              current_tag_attrs: vec![], -            current_attr_name: StrTendril::new(), -            current_attr_value: StrTendril::new(), -            current_comment: StrTendril::new(), +            current_attr_name: String::new(), +            current_attr_value: String::new(), +            current_comment: String::new(),              current_doctype: Doctype::new(),              last_start_tag_name: start_tag_name, -            temp_buf: StrTendril::new(), +            temp_buf: String::new(),              state_profile: BTreeMap::new(),              time_in_sink: 0,              current_line: 1, @@ -324,12 +322,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> {          pat: &str,          eq: fn(&u8, &u8) -> bool,      ) -> Option<bool> { -        input.push_front(replace(&mut self.temp_buf, StrTendril::new())); +        input.push_front(replace(&mut self.temp_buf, String::new()));          match input.eat(pat, eq) {              None if self.at_eof => Some(false),              None => {                  while let Some(c) = input.next() { -                    self.temp_buf.push_char(c); +                    self.temp_buf.push(c);                  }                  None              }, @@ -398,12 +396,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> {      fn emit_char(&mut self, c: char) {          self.process_token_and_continue(match c {              '\0' => NullCharacterToken, -            _ => CharacterTokens(StrTendril::from_char(c)), +            _ => CharacterTokens(String::from(c)),          });      }      // The string must not contain '\0'! -    fn emit_chars(&mut self, b: StrTendril) { +    fn emit_chars(&mut self, b: String) {          self.process_token_and_continue(CharacterTokens(b));      } @@ -453,7 +451,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {      fn emit_temp_buf(&mut self) {          // FIXME: Make sure that clearing on emit is spec-compatible. -        let buf = replace(&mut self.temp_buf, StrTendril::new()); +        let buf = replace(&mut self.temp_buf, String::new());          self.emit_chars(buf);      } @@ -463,7 +461,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {      }      fn emit_current_comment(&mut self) { -        let comment = replace(&mut self.current_comment, StrTendril::new()); +        let comment = replace(&mut self.current_comment, String::new());          self.process_token_and_continue(CommentToken(comment));      } @@ -475,7 +473,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {      fn create_tag(&mut self, kind: TagKind, c: char) {          self.discard_tag(); -        self.current_tag_name.push_char(c); +        self.current_tag_name.push(c);          self.current_tag_kind = kind;      } @@ -489,7 +487,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {      fn create_attribute(&mut self, c: char) {          self.finish_attribute(); -        self.current_attr_name.push_char(c); +        self.current_attr_name.push(c);      }      fn finish_attribute(&mut self) { @@ -516,7 +514,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {              self.current_attr_name.clear();              self.current_tag_attrs.push(Attribute {                  name: name, -                value: replace(&mut self.current_attr_value, StrTendril::new()), +                value: replace(&mut self.current_attr_value, String::new()),              });          }      } @@ -526,7 +524,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {          self.process_token_and_continue(DoctypeToken(doctype));      } -    fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<StrTendril> { +    fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<String> {          match kind {              Public => &mut self.current_doctype.public_id,              System => &mut self.current_doctype.system_id, @@ -537,7 +535,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {          let id = self.doctype_id(kind);          match *id {              Some(ref mut s) => s.clear(), -            None => *id = Some(StrTendril::new()), +            None => *id = Some(String::new()),          }      } @@ -573,18 +571,18 @@ impl<Sink: TokenSink> Tokenizer<Sink> {  macro_rules! shorthand (      ( $me:ident : emit $c:expr                     ) => ( $me.emit_char($c);                                   );      ( $me:ident : create_tag $kind:ident $c:expr   ) => ( $me.create_tag($kind, $c);                           ); -    ( $me:ident : push_tag $c:expr                 ) => ( $me.current_tag_name.push_char($c);                  ); +    ( $me:ident : push_tag $c:expr                 ) => ( $me.current_tag_name.push($c);                       );      ( $me:ident : discard_tag                      ) => ( $me.discard_tag();                                   );      ( $me:ident : discard_char $input:expr         ) => ( $me.discard_char($input);                            ); -    ( $me:ident : push_temp $c:expr                ) => ( $me.temp_buf.push_char($c);                          ); +    ( $me:ident : push_temp $c:expr                ) => ( $me.temp_buf.push($c);                               );      ( $me:ident : emit_temp                        ) => ( $me.emit_temp_buf();                                 );      ( $me:ident : clear_temp                       ) => ( $me.clear_temp_buf();                                );      ( $me:ident : create_attr $c:expr              ) => ( $me.create_attribute($c);                            ); -    ( $me:ident : push_name $c:expr                ) => ( $me.current_attr_name.push_char($c);                 ); -    ( $me:ident : push_value $c:expr               ) => ( $me.current_attr_value.push_char($c);                ); -    ( $me:ident : append_value $c:expr             ) => ( $me.current_attr_value.push_tendril($c);             ); -    ( $me:ident : push_comment $c:expr             ) => ( $me.current_comment.push_char($c);                   ); -    ( $me:ident : append_comment $c:expr           ) => ( $me.current_comment.push_slice($c);                  ); +    ( $me:ident : push_name $c:expr                ) => ( $me.current_attr_name.push($c);                      ); +    ( $me:ident : push_value $c:expr               ) => ( $me.current_attr_value.push($c);                     ); +    ( $me:ident : append_value $c:expr             ) => ( $me.current_attr_value.push_str($c);                 ); +    ( $me:ident : push_comment $c:expr             ) => ( $me.current_comment.push($c);                        ); +    ( $me:ident : append_comment $c:expr           ) => ( $me.current_comment.push_str($c);                    );      ( $me:ident : emit_comment                     ) => ( $me.emit_current_comment();                          );      ( $me:ident : clear_comment                    ) => ( $me.current_comment.clear();                         );      ( $me:ident : create_doctype                   ) => ( $me.current_doctype = Doctype::new();                ); @@ -1523,7 +1521,6 @@ impl<Sink: TokenSink> Tokenizer<Sink> {  #[allow(non_snake_case)]  mod test {      use super::option_push; // private items -    use tendril::{SliceExt, StrTendril};      use super::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts}; @@ -1539,7 +1536,7 @@ mod test {      // vector is a collection of the line numbers that each token is on.      struct LinesMatch {          tokens: Vec<Token>, -        current_str: StrTendril, +        current_str: String,          lines: Vec<(Token, u64)>,      } @@ -1547,7 +1544,7 @@ mod test {          fn new() -> LinesMatch {              LinesMatch {                  tokens: vec![], -                current_str: StrTendril::new(), +                current_str: String::new(),                  lines: vec![],              }          } @@ -1559,7 +1556,7 @@ mod test {          fn finish_str(&mut self) {              if self.current_str.len() > 0 { -                let s = replace(&mut self.current_str, StrTendril::new()); +                let s = replace(&mut self.current_str, String::new());                  self.tokens.push(CharacterTokens(s));              }          } @@ -1575,11 +1572,11 @@ mod test {          ) -> TokenSinkResult<Self::Handle> {              match token {                  CharacterTokens(b) => { -                    self.current_str.push_slice(&b); +                    self.current_str.push_str(&b);                  },                  NullCharacterToken => { -                    self.current_str.push_char('\0'); +                    self.current_str.push('\0');                  },                  ParseError(_) => { @@ -1610,7 +1607,7 @@ mod test {      // Take in tokens, process them, and return vector with line      // numbers that each token is on -    fn tokenize(input: Vec<StrTendril>, opts: TokenizerOpts) -> Vec<(Token, u64)> { +    fn tokenize(input: Vec<String>, opts: TokenizerOpts) -> Vec<(Token, u64)> {          let sink = LinesMatch::new();          let mut tok = Tokenizer::new(sink, opts);          let mut buffer = BufferQueue::new(); @@ -1623,7 +1620,7 @@ mod test {      }      // Create a tag token -    fn create_tag(token: StrTendril, tagkind: TagKind) -> Token { +    fn create_tag(token: String, tagkind: TagKind) -> Token {          let name = token;          let token = TagToken(Tag {              kind: tagkind, @@ -1636,23 +1633,23 @@ mod test {      #[test]      fn push_to_None_gives_singleton() { -        let mut s: Option<StrTendril> = None; +        let mut s: Option<String> = None;          option_push(&mut s, 'x'); -        assert_eq!(s, Some("x".to_tendril())); +        assert_eq!(s, Some("x".into()));      }      #[test]      fn push_to_empty_appends() { -        let mut s: Option<StrTendril> = Some(StrTendril::new()); +        let mut s: Option<String> = Some(String::new());          option_push(&mut s, 'x'); -        assert_eq!(s, Some("x".to_tendril())); +        assert_eq!(s, Some("x".into()));      }      #[test]      fn push_to_nonempty_appends() { -        let mut s: Option<StrTendril> = Some(StrTendril::from_slice("y")); +        let mut s: Option<String> = Some(String::from("y"));          option_push(&mut s, 'x'); -        assert_eq!(s, Some("yx".to_tendril())); +        assert_eq!(s, Some("yx".into()));      }      #[test] @@ -1665,16 +1662,16 @@ mod test {              last_start_tag_name: None,          };          let vector = vec![ -            StrTendril::from("<a>\n"), -            StrTendril::from("<b>\n"), -            StrTendril::from("</b>\n"), -            StrTendril::from("</a>\n"), +            String::from("<a>\n"), +            String::from("<b>\n"), +            String::from("</b>\n"), +            String::from("</a>\n"),          ];          let expected = vec![ -            (create_tag(StrTendril::from("a"), StartTag), 1), -            (create_tag(StrTendril::from("b"), StartTag), 2), -            (create_tag(StrTendril::from("b"), EndTag), 3), -            (create_tag(StrTendril::from("a"), EndTag), 4), +            (create_tag(String::from("a"), StartTag), 1), +            (create_tag(String::from("b"), StartTag), 2), +            (create_tag(String::from("b"), EndTag), 3), +            (create_tag(String::from("a"), EndTag), 4),          ];          let results = tokenize(vector, opts);          assert_eq!(results, expected); @@ -1690,16 +1687,16 @@ mod test {              last_start_tag_name: None,          };          let vector = vec![ -            StrTendril::from("<a>\r\n"), -            StrTendril::from("<b>\r\n"), -            StrTendril::from("</b>\r\n"), -            StrTendril::from("</a>\r\n"), +            String::from("<a>\r\n"), +            String::from("<b>\r\n"), +            String::from("</b>\r\n"), +            String::from("</a>\r\n"),          ];          let expected = vec![ -            (create_tag(StrTendril::from("a"), StartTag), 1), -            (create_tag(StrTendril::from("b"), StartTag), 2), -            (create_tag(StrTendril::from("b"), EndTag), 3), -            (create_tag(StrTendril::from("a"), EndTag), 4), +            (create_tag(String::from("a"), StartTag), 1), +            (create_tag(String::from("b"), StartTag), 2), +            (create_tag(String::from("b"), EndTag), 3), +            (create_tag(String::from("a"), EndTag), 4),          ];          let results = tokenize(vector, opts);          assert_eq!(results, expected); | 
