aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2021-04-08 12:42:04 +0200
committerMartin Fischer <martin@push-f.com>2021-04-08 15:40:48 +0200
commite0bef0105e0cc64bb610889b6921fd94897431d9 (patch)
tree4601b8a86778c10b65d232f99c1f5fd4b289c22a /src/tokenizer
parent8bb20dcdeec57b2109b05351663ec1dba9c65f84 (diff)
drop tendril dependency
Diffstat (limited to 'src/tokenizer')
-rw-r--r--src/tokenizer/char_ref/mod.rs21
-rw-r--r--src/tokenizer/interface.rs17
-rw-r--r--src/tokenizer/mod.rs127
3 files changed, 80 insertions, 85 deletions
diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs
index 484a9e1..6daeb13 100644
--- a/src/tokenizer/char_ref/mod.rs
+++ b/src/tokenizer/char_ref/mod.rs
@@ -8,7 +8,6 @@
// except according to those terms.
use super::{TokenSink, Tokenizer};
-use tendril::StrTendril;
use crate::util::buffer_queue::BufferQueue;
use crate::util::str::is_ascii_alnum;
@@ -55,7 +54,7 @@ pub struct CharRefTokenizer {
seen_digit: bool,
hex_marker: Option<char>,
- name_buf_opt: Option<StrTendril>,
+ name_buf_opt: Option<String>,
name_match: Option<(u32, u32)>,
name_len: usize,
}
@@ -84,13 +83,13 @@ impl CharRefTokenizer {
self.result.expect("get_result called before done")
}
- fn name_buf(&self) -> &StrTendril {
+ fn name_buf(&self) -> &str {
self.name_buf_opt
.as_ref()
.expect("name_buf missing in named character reference")
}
- fn name_buf_mut(&mut self) -> &mut StrTendril {
+ fn name_buf_mut(&mut self) -> &mut String {
self.name_buf_opt
.as_mut()
.expect("name_buf missing in named character reference")
@@ -189,7 +188,7 @@ impl CharRefTokenizer {
_ => {
self.state = Named;
- self.name_buf_opt = Some(StrTendril::new());
+ self.name_buf_opt = Some(String::new());
Progress
},
}
@@ -265,9 +264,9 @@ impl CharRefTokenizer {
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
) -> Status {
- let mut unconsume = StrTendril::from_char('#');
+ let mut unconsume = String::from('#');
match self.hex_marker {
- Some(c) => unconsume.push_char(c),
+ Some(c) => unconsume.push(c),
None => (),
}
@@ -316,7 +315,7 @@ impl CharRefTokenizer {
input: &mut BufferQueue,
) -> Status {
let c = unwrap_or_return!(tokenizer.get_char(input), Stuck);
- self.name_buf_mut().push_char(c);
+ self.name_buf_mut().push(c);
self.finish_named(tokenizer, input, Some(c))
}
@@ -411,7 +410,7 @@ impl CharRefTokenizer {
self.unconsume_name(input);
self.finish_none()
} else {
- input.push_front(StrTendril::from_slice(&self.name_buf()[name_len..]));
+ input.push_front(String::from(&self.name_buf()[name_len..]));
self.result = Some(CharRef {
chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()],
num_chars: if c2 == 0 { 1 } else { 2 },
@@ -428,7 +427,7 @@ impl CharRefTokenizer {
input: &mut BufferQueue,
) -> Status {
let c = unwrap_or_return!(tokenizer.get_char(input), Stuck);
- self.name_buf_mut().push_char(c);
+ self.name_buf_mut().push(c);
match c {
_ if is_ascii_alnum(c) => return Progress,
';' => self.emit_name_error(tokenizer),
@@ -462,7 +461,7 @@ impl CharRefTokenizer {
},
Octothorpe => {
- input.push_front(StrTendril::from_slice("#"));
+ input.push_front(String::from("#"));
tokenizer.emit_error(Borrowed("EOF after '#' in character reference"));
self.finish_none();
},
diff --git a/src/tokenizer/interface.rs b/src/tokenizer/interface.rs
index c331a0e..dfd9a9f 100644
--- a/src/tokenizer/interface.rs
+++ b/src/tokenizer/interface.rs
@@ -7,7 +7,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-use tendril::StrTendril;
use crate::tokenizer::states;
use std::borrow::Cow;
@@ -19,9 +18,9 @@ pub use self::Token::{EOFToken, NullCharacterToken, ParseError};
// FIXME: already exists in Servo DOM
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Doctype {
- pub name: Option<StrTendril>,
- pub public_id: Option<StrTendril>,
- pub system_id: Option<StrTendril>,
+ pub name: Option<String>,
+ pub public_id: Option<String>,
+ pub system_id: Option<String>,
pub force_quirks: bool,
}
@@ -51,16 +50,16 @@ pub enum TagKind {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]
pub struct Attribute {
/// The name of the attribute (e.g. the `class` in `<div class="test">`)
- pub name: StrTendril,
+ pub name: String,
/// The value of the attribute (e.g. the `"test"` in `<div class="test">`)
- pub value: StrTendril,
+ pub value: String,
}
/// A tag token.
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Tag {
pub kind: TagKind,
- pub name: StrTendril,
+ pub name: String,
pub self_closing: bool,
pub attrs: Vec<Attribute>,
}
@@ -86,8 +85,8 @@ impl Tag {
pub enum Token {
DoctypeToken(Doctype),
TagToken(Tag),
- CommentToken(StrTendril),
- CharacterTokens(StrTendril),
+ CommentToken(String),
+ CharacterTokens(String),
NullCharacterToken,
EOFToken,
ParseError(Cow<'static, str>),
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index f45c917..eb22b11 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -31,7 +31,6 @@ use std::default::Default;
use std::mem::replace;
pub use crate::util::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult};
-use tendril::StrTendril;
mod char_ref;
mod interface;
@@ -49,10 +48,10 @@ pub enum TokenizerResult<Handle> {
Script(Handle),
}
-fn option_push(opt_str: &mut Option<StrTendril>, c: char) {
+fn option_push(opt_str: &mut Option<String>, c: char) {
match *opt_str {
- Some(ref mut s) => s.push_char(c),
- None => *opt_str = Some(StrTendril::from_char(c)),
+ Some(ref mut s) => s.push(c),
+ None => *opt_str = Some(String::from(c)),
}
}
@@ -132,7 +131,7 @@ pub struct Tokenizer<Sink> {
current_tag_kind: TagKind,
/// Current tag name.
- current_tag_name: StrTendril,
+ current_tag_name: String,
/// Current tag is self-closing?
current_tag_self_closing: bool,
@@ -141,22 +140,22 @@ pub struct Tokenizer<Sink> {
current_tag_attrs: Vec<Attribute>,
/// Current attribute name.
- current_attr_name: StrTendril,
+ current_attr_name: String,
/// Current attribute value.
- current_attr_value: StrTendril,
+ current_attr_value: String,
/// Current comment.
- current_comment: StrTendril,
+ current_comment: String,
/// Current doctype token.
current_doctype: Doctype,
/// Last start tag name, for use in checking "appropriate end tag".
- last_start_tag_name: Option<StrTendril>,
+ last_start_tag_name: Option<String>,
/// The "temporary buffer" mentioned in the spec.
- temp_buf: StrTendril,
+ temp_buf: String,
/// Record of how many ns we spent in each state, if profiling is enabled.
state_profile: BTreeMap<states::State, u64>,
@@ -173,8 +172,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer<Sink> {
let start_tag_name = opts
.last_start_tag_name
- .take()
- .map(|s| StrTendril::from(s));
+ .take();
let state = opts.initial_state.unwrap_or(states::Data);
let discard_bom = opts.discard_bom;
Tokenizer {
@@ -188,15 +186,15 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
ignore_lf: false,
discard_bom,
current_tag_kind: StartTag,
- current_tag_name: StrTendril::new(),
+ current_tag_name: String::new(),
current_tag_self_closing: false,
current_tag_attrs: vec![],
- current_attr_name: StrTendril::new(),
- current_attr_value: StrTendril::new(),
- current_comment: StrTendril::new(),
+ current_attr_name: String::new(),
+ current_attr_value: String::new(),
+ current_comment: String::new(),
current_doctype: Doctype::new(),
last_start_tag_name: start_tag_name,
- temp_buf: StrTendril::new(),
+ temp_buf: String::new(),
state_profile: BTreeMap::new(),
time_in_sink: 0,
current_line: 1,
@@ -324,12 +322,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
pat: &str,
eq: fn(&u8, &u8) -> bool,
) -> Option<bool> {
- input.push_front(replace(&mut self.temp_buf, StrTendril::new()));
+ input.push_front(replace(&mut self.temp_buf, String::new()));
match input.eat(pat, eq) {
None if self.at_eof => Some(false),
None => {
while let Some(c) = input.next() {
- self.temp_buf.push_char(c);
+ self.temp_buf.push(c);
}
None
},
@@ -398,12 +396,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
fn emit_char(&mut self, c: char) {
self.process_token_and_continue(match c {
'\0' => NullCharacterToken,
- _ => CharacterTokens(StrTendril::from_char(c)),
+ _ => CharacterTokens(String::from(c)),
});
}
// The string must not contain '\0'!
- fn emit_chars(&mut self, b: StrTendril) {
+ fn emit_chars(&mut self, b: String) {
self.process_token_and_continue(CharacterTokens(b));
}
@@ -453,7 +451,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
fn emit_temp_buf(&mut self) {
// FIXME: Make sure that clearing on emit is spec-compatible.
- let buf = replace(&mut self.temp_buf, StrTendril::new());
+ let buf = replace(&mut self.temp_buf, String::new());
self.emit_chars(buf);
}
@@ -463,7 +461,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}
fn emit_current_comment(&mut self) {
- let comment = replace(&mut self.current_comment, StrTendril::new());
+ let comment = replace(&mut self.current_comment, String::new());
self.process_token_and_continue(CommentToken(comment));
}
@@ -475,7 +473,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
fn create_tag(&mut self, kind: TagKind, c: char) {
self.discard_tag();
- self.current_tag_name.push_char(c);
+ self.current_tag_name.push(c);
self.current_tag_kind = kind;
}
@@ -489,7 +487,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
fn create_attribute(&mut self, c: char) {
self.finish_attribute();
- self.current_attr_name.push_char(c);
+ self.current_attr_name.push(c);
}
fn finish_attribute(&mut self) {
@@ -516,7 +514,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
self.current_attr_name.clear();
self.current_tag_attrs.push(Attribute {
name: name,
- value: replace(&mut self.current_attr_value, StrTendril::new()),
+ value: replace(&mut self.current_attr_value, String::new()),
});
}
}
@@ -526,7 +524,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
self.process_token_and_continue(DoctypeToken(doctype));
}
- fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<StrTendril> {
+ fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<String> {
match kind {
Public => &mut self.current_doctype.public_id,
System => &mut self.current_doctype.system_id,
@@ -537,7 +535,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
let id = self.doctype_id(kind);
match *id {
Some(ref mut s) => s.clear(),
- None => *id = Some(StrTendril::new()),
+ None => *id = Some(String::new()),
}
}
@@ -573,18 +571,18 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
macro_rules! shorthand (
( $me:ident : emit $c:expr ) => ( $me.emit_char($c); );
( $me:ident : create_tag $kind:ident $c:expr ) => ( $me.create_tag($kind, $c); );
- ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.push_char($c); );
+ ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.push($c); );
( $me:ident : discard_tag ) => ( $me.discard_tag(); );
( $me:ident : discard_char $input:expr ) => ( $me.discard_char($input); );
- ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.push_char($c); );
+ ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.push($c); );
( $me:ident : emit_temp ) => ( $me.emit_temp_buf(); );
( $me:ident : clear_temp ) => ( $me.clear_temp_buf(); );
( $me:ident : create_attr $c:expr ) => ( $me.create_attribute($c); );
- ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.push_char($c); );
- ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.push_char($c); );
- ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.push_tendril($c); );
- ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.push_char($c); );
- ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_slice($c); );
+ ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.push($c); );
+ ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.push($c); );
+ ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.push_str($c); );
+ ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.push($c); );
+ ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_str($c); );
( $me:ident : emit_comment ) => ( $me.emit_current_comment(); );
( $me:ident : clear_comment ) => ( $me.current_comment.clear(); );
( $me:ident : create_doctype ) => ( $me.current_doctype = Doctype::new(); );
@@ -1523,7 +1521,6 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
#[allow(non_snake_case)]
mod test {
use super::option_push; // private items
- use tendril::{SliceExt, StrTendril};
use super::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts};
@@ -1539,7 +1536,7 @@ mod test {
// vector is a collection of the line numbers that each token is on.
struct LinesMatch {
tokens: Vec<Token>,
- current_str: StrTendril,
+ current_str: String,
lines: Vec<(Token, u64)>,
}
@@ -1547,7 +1544,7 @@ mod test {
fn new() -> LinesMatch {
LinesMatch {
tokens: vec![],
- current_str: StrTendril::new(),
+ current_str: String::new(),
lines: vec![],
}
}
@@ -1559,7 +1556,7 @@ mod test {
fn finish_str(&mut self) {
if self.current_str.len() > 0 {
- let s = replace(&mut self.current_str, StrTendril::new());
+ let s = replace(&mut self.current_str, String::new());
self.tokens.push(CharacterTokens(s));
}
}
@@ -1575,11 +1572,11 @@ mod test {
) -> TokenSinkResult<Self::Handle> {
match token {
CharacterTokens(b) => {
- self.current_str.push_slice(&b);
+ self.current_str.push_str(&b);
},
NullCharacterToken => {
- self.current_str.push_char('\0');
+ self.current_str.push('\0');
},
ParseError(_) => {
@@ -1610,7 +1607,7 @@ mod test {
// Take in tokens, process them, and return vector with line
// numbers that each token is on
- fn tokenize(input: Vec<StrTendril>, opts: TokenizerOpts) -> Vec<(Token, u64)> {
+ fn tokenize(input: Vec<String>, opts: TokenizerOpts) -> Vec<(Token, u64)> {
let sink = LinesMatch::new();
let mut tok = Tokenizer::new(sink, opts);
let mut buffer = BufferQueue::new();
@@ -1623,7 +1620,7 @@ mod test {
}
// Create a tag token
- fn create_tag(token: StrTendril, tagkind: TagKind) -> Token {
+ fn create_tag(token: String, tagkind: TagKind) -> Token {
let name = token;
let token = TagToken(Tag {
kind: tagkind,
@@ -1636,23 +1633,23 @@ mod test {
#[test]
fn push_to_None_gives_singleton() {
- let mut s: Option<StrTendril> = None;
+ let mut s: Option<String> = None;
option_push(&mut s, 'x');
- assert_eq!(s, Some("x".to_tendril()));
+ assert_eq!(s, Some("x".into()));
}
#[test]
fn push_to_empty_appends() {
- let mut s: Option<StrTendril> = Some(StrTendril::new());
+ let mut s: Option<String> = Some(String::new());
option_push(&mut s, 'x');
- assert_eq!(s, Some("x".to_tendril()));
+ assert_eq!(s, Some("x".into()));
}
#[test]
fn push_to_nonempty_appends() {
- let mut s: Option<StrTendril> = Some(StrTendril::from_slice("y"));
+ let mut s: Option<String> = Some(String::from("y"));
option_push(&mut s, 'x');
- assert_eq!(s, Some("yx".to_tendril()));
+ assert_eq!(s, Some("yx".into()));
}
#[test]
@@ -1665,16 +1662,16 @@ mod test {
last_start_tag_name: None,
};
let vector = vec![
- StrTendril::from("<a>\n"),
- StrTendril::from("<b>\n"),
- StrTendril::from("</b>\n"),
- StrTendril::from("</a>\n"),
+ String::from("<a>\n"),
+ String::from("<b>\n"),
+ String::from("</b>\n"),
+ String::from("</a>\n"),
];
let expected = vec![
- (create_tag(StrTendril::from("a"), StartTag), 1),
- (create_tag(StrTendril::from("b"), StartTag), 2),
- (create_tag(StrTendril::from("b"), EndTag), 3),
- (create_tag(StrTendril::from("a"), EndTag), 4),
+ (create_tag(String::from("a"), StartTag), 1),
+ (create_tag(String::from("b"), StartTag), 2),
+ (create_tag(String::from("b"), EndTag), 3),
+ (create_tag(String::from("a"), EndTag), 4),
];
let results = tokenize(vector, opts);
assert_eq!(results, expected);
@@ -1690,16 +1687,16 @@ mod test {
last_start_tag_name: None,
};
let vector = vec![
- StrTendril::from("<a>\r\n"),
- StrTendril::from("<b>\r\n"),
- StrTendril::from("</b>\r\n"),
- StrTendril::from("</a>\r\n"),
+ String::from("<a>\r\n"),
+ String::from("<b>\r\n"),
+ String::from("</b>\r\n"),
+ String::from("</a>\r\n"),
];
let expected = vec![
- (create_tag(StrTendril::from("a"), StartTag), 1),
- (create_tag(StrTendril::from("b"), StartTag), 2),
- (create_tag(StrTendril::from("b"), EndTag), 3),
- (create_tag(StrTendril::from("a"), EndTag), 4),
+ (create_tag(String::from("a"), StartTag), 1),
+ (create_tag(String::from("b"), StartTag), 2),
+ (create_tag(String::from("b"), EndTag), 3),
+ (create_tag(String::from("a"), EndTag), 4),
];
let results = tokenize(vector, opts);
assert_eq!(results, expected);