// Copyright 2014-2017 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use crate::error::Error; use crate::tokenizer::states; #[cfg(feature = "spans")] use std::ops::Range; pub use self::TagKind::{EndTag, StartTag}; pub use self::Token::{CharacterTokens, CommentToken, DoctypeToken, TagToken}; pub use self::Token::{EOFToken, NullCharacterToken, ParseError}; /// A `DOCTYPE` token. // FIXME: already exists in Servo DOM #[derive(PartialEq, Eq, Clone, Debug)] pub struct Doctype { pub name: Option, pub public_id: Option, pub system_id: Option, pub force_quirks: bool, } impl Doctype { pub fn new() -> Doctype { Doctype { name: None, public_id: None, system_id: None, force_quirks: false, } } } #[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)] pub enum TagKind { StartTag, EndTag, } /// A tag attribute, e.g. `class="test"` in `
`. /// /// The namespace on the attribute name is almost always ns!(""). /// The tokenizer creates all attributes this way, but the tree /// builder will adjust certain attribute names inside foreign /// content (MathML, SVG). #[derive(PartialEq, Eq, Clone, Debug)] pub struct Attribute { /// The name of the attribute (e.g. the `class` in `
`) pub name: String, /// The value of the attribute (e.g. the `"test"` in `
`) pub value: String, #[cfg(feature = "spans")] #[cfg_attr(docsrs, doc(cfg(feature = "spans")))] pub name_span: Range, #[cfg(feature = "spans")] #[cfg_attr(docsrs, doc(cfg(feature = "spans")))] pub value_span: Range, } impl Ord for Attribute { fn cmp(&self, other: &Self) -> std::cmp::Ordering { (&self.name, &self.value).cmp(&(&other.name, &other.value)) } } impl PartialOrd for Attribute { fn partial_cmp(&self, other: &Self) -> Option { (&self.name, &self.value).partial_cmp(&(&other.name, &other.value)) } } /// A tag token. #[derive(PartialEq, Eq, Clone, Debug)] pub struct Tag { pub kind: TagKind, pub name: String, pub self_closing: bool, pub attrs: Vec, #[cfg(feature = "spans")] #[cfg_attr(docsrs, doc(cfg(feature = "spans")))] pub name_span: Range, } impl Tag { /// Are the tags equivalent when we don't care about attribute order? /// Also ignores the self-closing flag and spans. pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool { if (self.kind != other.kind) || (self.name != other.name) { return false; } let mut self_attrs = self.attrs.clone(); let mut other_attrs = other.attrs.clone(); self_attrs.sort(); other_attrs.sort(); self_attrs == other_attrs } } #[derive(PartialEq, Eq, Debug)] pub enum Token { DoctypeToken(Doctype), TagToken(Tag), CommentToken(String), CharacterTokens(String), NullCharacterToken, EOFToken, ParseError(Error), } #[derive(Debug, PartialEq)] #[must_use] pub enum TokenSinkResult { Continue, Break, Plaintext, RawData(states::RawKind), } /// Types which can receive tokens from the tokenizer. pub trait TokenSink { /// Process a token. fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult; // Signal sink that tokenization reached the end. fn end(&mut self) {} /// Used in the markup declaration open state. By default, this always /// returns false and thus all CDATA sections are tokenized as bogus /// comments. /// fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool { false } }