1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use tendril::StrTendril;
use crate::tokenizer::states;
use std::borrow::Cow;
pub use self::TagKind::{EndTag, StartTag};
pub use self::Token::{CharacterTokens, CommentToken, DoctypeToken, TagToken};
pub use self::Token::{EOFToken, NullCharacterToken, ParseError};
/// A `DOCTYPE` token.
// FIXME: already exists in Servo DOM
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Doctype {
pub name: Option<StrTendril>,
pub public_id: Option<StrTendril>,
pub system_id: Option<StrTendril>,
pub force_quirks: bool,
}
impl Doctype {
pub fn new() -> Doctype {
Doctype {
name: None,
public_id: None,
system_id: None,
force_quirks: false,
}
}
}
#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
pub enum TagKind {
StartTag,
EndTag,
}
/// A tag attribute, e.g. `class="test"` in `<div class="test" ...>`.
///
/// The namespace on the attribute name is almost always ns!("").
/// The tokenizer creates all attributes this way, but the tree
/// builder will adjust certain attribute names inside foreign
/// content (MathML, SVG).
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]
pub struct Attribute {
/// The name of the attribute (e.g. the `class` in `<div class="test">`)
pub name: StrTendril,
/// The value of the attribute (e.g. the `"test"` in `<div class="test">`)
pub value: StrTendril,
}
/// A tag token.
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Tag {
pub kind: TagKind,
pub name: StrTendril,
pub self_closing: bool,
pub attrs: Vec<Attribute>,
}
impl Tag {
/// Are the tags equivalent when we don't care about attribute order?
/// Also ignores the self-closing flag.
pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool {
if (self.kind != other.kind) || (self.name != other.name) {
return false;
}
let mut self_attrs = self.attrs.clone();
let mut other_attrs = other.attrs.clone();
self_attrs.sort();
other_attrs.sort();
self_attrs == other_attrs
}
}
#[derive(PartialEq, Eq, Debug)]
pub enum Token {
DoctypeToken(Doctype),
TagToken(Tag),
CommentToken(StrTendril),
CharacterTokens(StrTendril),
NullCharacterToken,
EOFToken,
ParseError(Cow<'static, str>),
}
#[derive(Debug, PartialEq)]
#[must_use]
pub enum TokenSinkResult<Handle> {
Continue,
Script(Handle),
Plaintext,
RawData(states::RawKind),
}
/// Types which can receive tokens from the tokenizer.
pub trait TokenSink {
type Handle;
/// Process a token.
fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult<Self::Handle>;
// Signal sink that tokenization reached the end.
fn end(&mut self) {}
/// Used in the markup declaration open state. By default, this always
/// returns false and thus all CDATA sections are tokenized as bogus
/// comments.
/// https://html.spec.whatwg.org/multipage/#markup-declaration-open-state
fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
false
}
}
|