aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer/interface.rs
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2021-04-08 08:42:01 +0200
committerMartin Fischer <martin@push-f.com>2021-04-08 15:40:37 +0200
commit57e7eefcbe6fb8c3dc4b01c707be9de4c34963a7 (patch)
tree6a9d296389bf3023396592c8514ed6712e011c7f /src/tokenizer/interface.rs
import https://github.com/servo/html5ever
commit d1206daa740305f55a5fa159e43eb33afc359cb4
Diffstat (limited to 'src/tokenizer/interface.rs')
-rw-r--r--src/tokenizer/interface.rs110
1 files changed, 110 insertions, 0 deletions
diff --git a/src/tokenizer/interface.rs b/src/tokenizer/interface.rs
new file mode 100644
index 0000000..22d11be
--- /dev/null
+++ b/src/tokenizer/interface.rs
@@ -0,0 +1,110 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use crate::interface::Attribute;
+use crate::tendril::StrTendril;
+use crate::tokenizer::states;
+use crate::LocalName;
+use std::borrow::Cow;
+
+pub use self::TagKind::{EndTag, StartTag};
+pub use self::Token::{CharacterTokens, CommentToken, DoctypeToken, TagToken};
+pub use self::Token::{EOFToken, NullCharacterToken, ParseError};
+
+/// A `DOCTYPE` token.
+// FIXME: already exists in Servo DOM
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub struct Doctype {
+ pub name: Option<StrTendril>,
+ pub public_id: Option<StrTendril>,
+ pub system_id: Option<StrTendril>,
+ pub force_quirks: bool,
+}
+
+impl Doctype {
+ pub fn new() -> Doctype {
+ Doctype {
+ name: None,
+ public_id: None,
+ system_id: None,
+ force_quirks: false,
+ }
+ }
+}
+
+#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
+pub enum TagKind {
+ StartTag,
+ EndTag,
+}
+
+/// A tag token.
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub struct Tag {
+ pub kind: TagKind,
+ pub name: LocalName,
+ pub self_closing: bool,
+ pub attrs: Vec<Attribute>,
+}
+
+impl Tag {
+ /// Are the tags equivalent when we don't care about attribute order?
+ /// Also ignores the self-closing flag.
+ pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool {
+ if (self.kind != other.kind) || (self.name != other.name) {
+ return false;
+ }
+
+ let mut self_attrs = self.attrs.clone();
+ let mut other_attrs = other.attrs.clone();
+ self_attrs.sort();
+ other_attrs.sort();
+
+ self_attrs == other_attrs
+ }
+}
+
+#[derive(PartialEq, Eq, Debug)]
+pub enum Token {
+ DoctypeToken(Doctype),
+ TagToken(Tag),
+ CommentToken(StrTendril),
+ CharacterTokens(StrTendril),
+ NullCharacterToken,
+ EOFToken,
+ ParseError(Cow<'static, str>),
+}
+
+#[derive(Debug, PartialEq)]
+#[must_use]
+pub enum TokenSinkResult<Handle> {
+ Continue,
+ Script(Handle),
+ Plaintext,
+ RawData(states::RawKind),
+}
+
+/// Types which can receive tokens from the tokenizer.
+pub trait TokenSink {
+ type Handle;
+
+ /// Process a token.
+ fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult<Self::Handle>;
+
+ // Signal sink that tokenization reached the end.
+ fn end(&mut self) {}
+
+ /// Used in the markup declaration open state. By default, this always
+ /// returns false and thus all CDATA sections are tokenized as bogus
+ /// comments.
+ /// https://html.spec.whatwg.org/multipage/#markup-declaration-open-state
+ fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
+ false
+ }
+}