aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2021-04-08 10:25:09 +0200
committerMartin Fischer <martin@push-f.com>2021-04-08 15:40:48 +0200
commit8bb20dcdeec57b2109b05351663ec1dba9c65f84 (patch)
tree3ddf7196d2fe66d92e4819c362e248cb96be3c0b
parentfc9d3132f390b038544e45048b4fe345a3d47793 (diff)
drop markup5ever dependency
-rw-r--r--Cargo.toml1
-rw-r--r--examples/tokenize.rs2
-rw-r--r--src/lib.rs12
-rw-r--r--src/tokenizer/interface.rs5
-rw-r--r--src/tokenizer/mod.rs20
-rw-r--r--src/util/buffer_queue.rs42
-rw-r--r--src/util/smallcharset.rs14
7 files changed, 10 insertions, 86 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 6e8b156..e32901f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,6 @@ edition = "2018"
[dependencies]
log = "0.4"
mac = "0.1"
-markup5ever = { version = "0.10" }
tendril = "0.4"
[dev-dependencies]
diff --git a/examples/tokenize.rs b/examples/tokenize.rs
index 039ffb7..c422f0c 100644
--- a/examples/tokenize.rs
+++ b/examples/tokenize.rs
@@ -61,7 +61,7 @@ impl TokenSink for TokenPrinter {
for attr in tag.attrs.iter() {
print!(
" \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'",
- attr.name.local, attr.value
+ attr.name, attr.value
);
}
if tag.self_closing {
diff --git a/src/lib.rs b/src/lib.rs
index 95647ff..88934f0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -12,7 +12,6 @@
#![cfg_attr(test, deny(warnings))]
#![allow(unused_parens)]
-pub use markup5ever;
pub use tendril;
#[macro_use]
@@ -20,17 +19,6 @@ mod macros;
/// Create a [`SmallCharSet`], with each space-separated number stored in the set.
///
-/// # Examples
-///
-/// ```
-/// # #[macro_use] extern crate markup5ever;
-/// # fn main() {
-/// let set = small_char_set!(12 54 42);
-/// assert_eq!(set.bits,
-/// 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000);
-/// # }
-/// ```
-///
/// [`SmallCharSet`]: struct.SmallCharSet.html
#[macro_export]
macro_rules! small_char_set ( ($($e:expr)+) => (
diff --git a/src/tokenizer/interface.rs b/src/tokenizer/interface.rs
index ebb8569..c331a0e 100644
--- a/src/tokenizer/interface.rs
+++ b/src/tokenizer/interface.rs
@@ -9,7 +9,6 @@
use tendril::StrTendril;
use crate::tokenizer::states;
-use markup5ever::{LocalName, QualName};
use std::borrow::Cow;
pub use self::TagKind::{EndTag, StartTag};
@@ -52,7 +51,7 @@ pub enum TagKind {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]
pub struct Attribute {
/// The name of the attribute (e.g. the `class` in `<div class="test">`)
- pub name: QualName,
+ pub name: StrTendril,
/// The value of the attribute (e.g. the `"test"` in `<div class="test">`)
pub value: StrTendril,
}
@@ -61,7 +60,7 @@ pub struct Attribute {
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Tag {
pub kind: TagKind,
- pub name: LocalName,
+ pub name: StrTendril,
pub self_closing: bool,
pub attrs: Vec<Attribute>,
}
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index c6f7276..f45c917 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -25,7 +25,6 @@ use crate::util::{smallcharset::SmallCharSet, str::lower_ascii_letter};
use log::debug;
use mac::{_tt_as_expr_hack, format_if, matches};
-use markup5ever::{namespace_url, ns};
use std::borrow::Cow::{self, Borrowed};
use std::collections::BTreeMap;
use std::default::Default;
@@ -33,7 +32,6 @@ use std::mem::replace;
pub use crate::util::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult};
use tendril::StrTendril;
-use markup5ever::{LocalName, QualName};
mod char_ref;
mod interface;
@@ -155,7 +153,7 @@ pub struct Tokenizer<Sink> {
current_doctype: Doctype,
/// Last start tag name, for use in checking "appropriate end tag".
- last_start_tag_name: Option<LocalName>,
+ last_start_tag_name: Option<StrTendril>,
/// The "temporary buffer" mentioned in the spec.
temp_buf: StrTendril,
@@ -176,7 +174,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
let start_tag_name = opts
.last_start_tag_name
.take()
- .map(|s| LocalName::from(&*s));
+ .map(|s| StrTendril::from(s));
let state = opts.initial_state.unwrap_or(states::Data);
let discard_bom = opts.discard_bom;
Tokenizer {
@@ -412,7 +410,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
fn emit_current_tag(&mut self) -> ProcessResult<Sink::Handle> {
self.finish_attribute();
- let name = LocalName::from(&*self.current_tag_name);
+ let name = self.current_tag_name.clone();
self.current_tag_name.clear();
match self.current_tag_kind {
@@ -506,7 +504,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
let name = &*self.current_attr_name;
self.current_tag_attrs
.iter()
- .any(|a| &*a.name.local == name)
+ .any(|a| &*a.name == name)
};
if dup {
@@ -514,12 +512,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
self.current_attr_name.clear();
self.current_attr_value.clear();
} else {
- let name = LocalName::from(&*self.current_attr_name);
+ let name = self.current_attr_name.clone();
self.current_attr_name.clear();
self.current_tag_attrs.push(Attribute {
- // The tree builder will adjust the namespace if necessary.
- // This only happens in foreign elements.
- name: QualName::new(None, ns!(), name),
+ name: name,
value: replace(&mut self.current_attr_value, StrTendril::new()),
});
}
@@ -1538,8 +1534,6 @@ mod test {
use crate::util::buffer_queue::BufferQueue;
use std::mem::replace;
- use markup5ever::LocalName;
-
// LinesMatch implements the TokenSink trait. It is used for testing to see
// if current_line is being updated when process_token is called. The lines
// vector is a collection of the line numbers that each token is on.
@@ -1630,7 +1624,7 @@ mod test {
// Create a tag token
fn create_tag(token: StrTendril, tagkind: TagKind) -> Token {
- let name = LocalName::from(&*token);
+ let name = token;
let token = TagToken(Tag {
kind: tagkind,
name,
diff --git a/src/util/buffer_queue.rs b/src/util/buffer_queue.rs
index d572489..7f8c3cc 100644
--- a/src/util/buffer_queue.rs
+++ b/src/util/buffer_queue.rs
@@ -123,31 +123,6 @@ impl BufferQueue {
/// Pops and returns either a single character from the given set, or
/// a buffer of characters none of which are in the set.
- ///
- /// # Examples
- ///
- /// ```
- /// # #[macro_use] extern crate markup5ever;
- /// # #[macro_use] extern crate tendril;
- /// # fn main() {
- /// use markup5ever::buffer_queue::{BufferQueue, SetResult};
- ///
- /// let mut queue = BufferQueue::new();
- /// queue.push_back(format_tendril!(r#"<some_tag attr="text">SomeText</some_tag>"#));
- /// let set = small_char_set!(b'<' b'>' b' ' b'=' b'"' b'/');
- /// let tag = format_tendril!("some_tag");
- /// let attr = format_tendril!("attr");
- /// let attr_val = format_tendril!("text");
- /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('<')));
- /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(tag)));
- /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet(' ')));
- /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(attr)));
- /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('=')));
- /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('"')));
- /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(attr_val)));
- /// // ...
- /// # }
- /// ```
pub fn pop_except_from(&mut self, set: SmallCharSet) -> Option<SetResult> {
let (result, now_empty) = match self.buffers.front_mut() {
None => (None, false),
@@ -181,23 +156,6 @@ impl BufferQueue {
/// it wasn't possible to know (more data is needed).
///
/// The custom comparison function is used elsewhere to compare ascii-case-insensitively.
- ///
- /// # Examples
- ///
- /// ```
- /// # extern crate markup5ever;
- /// # #[macro_use] extern crate tendril;
- /// # fn main() {
- /// use markup5ever::buffer_queue::{BufferQueue};
- ///
- /// let mut queue = BufferQueue::new();
- /// queue.push_back(format_tendril!("testtext"));
- /// let test_str = "test";
- /// assert_eq!(queue.eat("test", |&a, &b| a == b), Some(true));
- /// assert_eq!(queue.eat("text", |&a, &b| a == b), Some(true));
- /// assert!(queue.is_empty());
- /// # }
- /// ```
pub fn eat<F: Fn(&u8, &u8) -> bool>(&mut self, pat: &str, eq: F) -> Option<bool> {
let mut buffers_exhausted = 0;
let mut consumed_from_last = 0;
diff --git a/src/util/smallcharset.rs b/src/util/smallcharset.rs
index 957dad7..aeeb189 100644
--- a/src/util/smallcharset.rs
+++ b/src/util/smallcharset.rs
@@ -41,20 +41,6 @@ impl SmallCharSet {
/// Count the number of bytes of characters at the beginning of `buf` which are not in the set.
///
/// This functionality is used in [`BufferQueue::pop_except_from`].
- ///
- /// # Examples
- ///
- /// ```
- /// # #[macro_use] extern crate markup5ever;
- /// # fn main() {
- /// let set = small_char_set!(48 49 50); // '0' '1' '2'
- /// // `test` is 4 chars, ๐Ÿ˜ is 4 chars, then we meet a character in the set
- /// let test_str = "test๐Ÿ˜01232afd";
- /// assert_eq!(set.nonmember_prefix_len(test_str), 8);
- /// # }
- /// ```
- ///
- /// [`BufferQueue::pop_except_from`]: buffer_queue/struct.BufferQueue.html#method.pop_except_from
pub fn nonmember_prefix_len(&self, buf: &str) -> u32 {
let mut n = 0;
for b in buf.bytes() {