diff options
Diffstat (limited to 'src/utils.rs')
-rw-r--r-- | src/utils.rs | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..67db1b9 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,164 @@ +macro_rules! surrogate_pat { + () => { + 0xd800..=0xdfff + }; +} + +pub(crate) use surrogate_pat; + +macro_rules! control_pat { + () => (0x0d | 0x0000..=0x001f | 0x007f..=0x009f) +} + +pub(crate) use control_pat; + +macro_rules! ascii_digit_pat { + () => { + '0'..='9' + }; +} + +pub(crate) use ascii_digit_pat; + +macro_rules! whitespace_pat { + () => { + '\t' | '\u{0A}' | '\u{0C}' | ' ' + }; +} + +pub(crate) use whitespace_pat; + +macro_rules! noncharacter_pat { + () => { + 0xfdd0 + ..=0xfdef + | 0xfffe + | 0xffff + | 0x1fffe + | 0x1ffff + | 0x2fffe + | 0x2ffff + | 0x3fffe + | 0x3ffff + | 0x4fffe + | 0x4ffff + | 0x5fffe + | 0x5ffff + | 0x6fffe + | 0x6ffff + | 0x7fffe + | 0x7ffff + | 0x8fffe + | 0x8ffff + | 0x9fffe + | 0x9ffff + | 0xafffe + | 0xaffff + | 0xbfffe + | 0xbffff + | 0xcfffe + | 0xcffff + | 0xdfffe + | 0xdffff + | 0xefffe + | 0xeffff + | 0xffffe + | 0xfffff + | 0x10fffe + | 0x10ffff + }; +} + +pub(crate) use noncharacter_pat; + +// When integration tests are running, this enum is public and we get warnings about missing docs. +// However, it's not actually part of public API. +#[allow(missing_docs)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum State { + Data, + RcData, + RawText, + ScriptData, + PlainText, + TagOpen, + EndTagOpen, + TagName, + RcDataLessThanSign, + RcDataEndTagOpen, + RcDataEndTagName, + RawTextLessThanSign, + RawTextEndTagOpen, + RawTextEndTagName, + ScriptDataLessThanSign, + ScriptDataEndTagOpen, + ScriptDataEndTagName, + ScriptDataEscapeStart, + ScriptDataEscapeStartDash, + ScriptDataEscaped, + ScriptDataEscapedDash, + ScriptDataEscapedDashDash, + ScriptDataEscapedLessThanSign, + ScriptDataEscapedEndTagOpen, + ScriptDataEscapedEndTagName, + ScriptDataDoubleEscapeStart, + ScriptDataDoubleEscaped, + ScriptDataDoubleEscapedDash, + ScriptDataDoubleEscapedDashDash, + ScriptDataDoubleEscapedLessThanSign, + ScriptDataDoubleEscapeEnd, + BeforeAttributeName, + AttributeName, + AfterAttributeName, + BeforeAttributeValue, + AttributeValueDoubleQuoted, + AttributeValueSingleQuoted, + AttributeValueUnquoted, + AfterAttributeValueQuoted, + SelfClosingStartTag, + BogusComment, + MarkupDeclarationOpen, + CommentStart, + CommentStartDash, + Comment, + CommentLessThanSign, + CommentLessThanSignBang, + CommentLessThanSignBangDash, + CommentLessThanSignBangDashDash, + CommentEndDash, + CommentEnd, + CommentEndBang, + Doctype, + BeforeDoctypeName, + DoctypeName, + AfterDoctypeName, + AfterDoctypePublicKeyword, + BeforeDoctypePublicIdentifier, + DoctypePublicIdentifierDoubleQuoted, + DoctypePublicIdentifierSingleQuoted, + AfterDoctypePublicIdentifier, + BetweenDoctypePublicAndSystemIdentifiers, + AfterDoctypeSystemKeyword, + BeforeDoctypeSystemIdentifier, + DoctypeSystemIdentifierDoubleQuoted, + DoctypeSystemIdentifierSingleQuoted, + AfterDoctypeSystemIdentifier, + BogusDoctype, + CdataSection, + CdataSectionBracket, + CdataSectionEnd, + CharacterReference, + NamedCharacterReference, + AmbiguousAmpersand, + NumericCharacterReference, + HexadecimalCharacterReferenceStart, + DecimalCharacterReferenceStart, + HexadecimalCharacterReference, + DecimalCharacterReference, + NumericCharacterReferenceEnd, +} + +pub enum ControlToken { + Eof, + Continue, +} |