src/emitter.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180

use std::ops::Range;

use crate::token::AttrValueSyntax;
use crate::Error;

/// An emitter is an object providing methods to the tokenizer to produce ("emit") tokens.
///
/// Domain-specific applications of the HTML tokenizer can manually implement this trait to
/// customize per-token allocations, or avoid them altogether.
///
/// An emitter is assumed to have these internal states:
///
/// * _current token_: Can be a tag, doctype or comment token. There's only one current token.
/// * _current attribute_: The currently processed HTML attribute, consisting of two strings for name and value.
///
/// The following methods are describing what kind of behavior the WHATWG spec expects, but that
/// doesn't mean you need to follow it. For example:
///
/// * If your usage of the tokenizer will ignore all errors, none of the error handling and
///   validation requirements apply to you. You can implement `emit_error` as noop and omit all
///   checks that would emit errors.
///
/// * If you don't care about attributes at all, you can make all related methods a noop.

#[allow(unused_variables)] // workaround for https://github.com/rust-lang/rust/issues/91074
pub trait Emitter<O> {
    /// The state machine has reached the end of the file.
    fn emit_eof(&mut self);

    /// A (probably recoverable) parsing error has occurred.
    fn emit_error(&mut self, error: Error, span: Range<O>);

    /// Emit a bunch of plain characters as character tokens.
    fn emit_string(&mut self, c: &str);

    /// Set the _current token_ to a start tag.
    fn init_start_tag(&mut self, tag_offset: O, name_offset: O);

    /// Set the _current token_ to an end tag.
    fn init_end_tag(&mut self, tag_offset: O, name_offset: O);

    /// Set the _current token_ to a comment.
    fn init_comment(&mut self, data_start_offset: O);

    /// Emit the _current token_, assuming it is a tag.
    ///
    /// Also get the current attribute and append it to the to-be-emitted tag. See docstring for
    /// [`Emitter::init_attribute_name`] for how duplicates should be handled.
    ///
    /// If an end tag is emitted with attributes, an [`Error::EndTagWithAttributes`]
    /// error should be emitted.
    ///
    /// If the current token is not a start/end tag, this method may panic.
    fn emit_current_tag(&mut self, offset: O);

    /// Emit the _current token_, assuming it is a comment.
    ///
    /// If the current token is not a comment, this method may panic.
    fn emit_current_comment(&mut self, data_end_offset: O);

    /// Emit the _current token_, assuming it is a doctype.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn emit_current_doctype(&mut self, offset: O);

    /// Called after the last [`push_tag_name`] call for a tag name.
    ///
    /// [`push_tag_name`]: Self::push_tag_name
    fn terminate_tag_name(&mut self, offset: O) {}

    /// Called after the last [`push_attribute_name`] call for an attribute name.
    ///
    /// [`push_attribute_name`]: Self::push_attribute_name
    fn terminate_attribute_name(&mut self, offset: O) {}

    /// Called after the last [`push_attribute_value`] call for an attribute value.
    ///
    /// [`push_attribute_value`]: Self::push_attribute_value
    fn terminate_attribute_value(&mut self, offset: O) {}

    /// Assuming the _current token_ is a start tag, set the self-closing flag.
    ///
    /// If the current token is not a start or end tag, this method may panic.
    ///
    /// If the current token is an end tag, the emitter should emit the
    /// [`Error::EndTagWithTrailingSolidus`] error.
    fn set_self_closing(&mut self, slash_span: Range<O>);

    /// Assuming the _current token_ is a doctype, set its "force quirks" flag to true.
    ///
    /// If the current token is not a doctype, this method pay panic.
    fn set_force_quirks(&mut self);

    /// Assuming the _current token_ is a start/end tag, append a string to the current tag's name.
    ///
    /// If the current token is not a start or end tag, this method may panic.
    fn push_tag_name(&mut self, s: &str);

    /// Assuming the _current token_ is a comment, append a string to the comment's contents.
    ///
    /// If the current token is not a comment, this method may panic.
    fn push_comment(&mut self, s: &str);

    /// Assuming the _current token_ is a doctype, append a string to the doctype's name.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn push_doctype_name(&mut self, s: &str);

    /// Set the _current token_ to a new doctype token:
    ///
    /// * the name should be empty
    /// * the "public identifier" should be null (different from empty)
    /// * the "system identifier" should be null (different from empty)
    /// * the "force quirks" flag should be `false`
    fn init_doctype(&mut self, offset: O);

    /// Set the _current attribute_ to a new one, starting with empty name and value strings.
    ///
    /// The old attribute, if any, should be put on the _current token_. If an attribute with that
    /// name already exists, WHATWG says the new one should be ignored and a
    /// [`Error::DuplicateAttribute`] error should be emitted.
    ///
    /// If the current token is no tag at all, this method may panic.
    fn init_attribute_name(&mut self, offset: O);

    /// Called before the first push_attribute_value call.
    ///
    /// If there is no current attribute, this method may panic.
    fn init_attribute_value(&mut self, syntax: AttrValueSyntax, offset: O) {}

    /// Append a string to the current attribute's name.
    ///
    /// If there is no current attribute, this method may panic.
    fn push_attribute_name(&mut self, s: &str);

    /// Append a string to the current attribute's value.
    ///
    /// If there is no current attribute, this method may panic.
    fn push_attribute_value(&mut self, s: &str);

    /// Assuming the _current token_ is a doctype, set its name to the empty string.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn init_doctype_name(&mut self, offset: O) {}

    /// Called after the last [`push_doctype_name`] call for a DOCTYPE name.
    ///
    /// [`push_doctype_name`]: Self::push_doctype_name
    fn terminate_doctype_name(&mut self, offset: O) {}

    /// Assuming the _current token_ is a doctype, set its "public identifier" to the empty string.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn init_doctype_public_id(&mut self, offset: O);

    /// Called after the last [`push_doctype_public_id`] call for a DOCTYPE public identifier.
    ///
    /// [`push_doctype_public_id`]: Self::push_doctype_public_id
    fn terminate_doctype_public_id(&mut self, offset: O) {}

    /// Assuming the _current token_ is a doctype, set its "system identifier" to the empty string.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn init_doctype_system_id(&mut self, offset: O);

    /// Assuming the _current token_ is a doctype, append a string to its "public identifier" to the given string.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn push_doctype_public_id(&mut self, s: &str);

    /// Assuming the _current token_ is a doctype, append a string to its "system identifier" to the given string.
    ///
    /// If the current token is not a doctype, this method may panic.
    fn push_doctype_system_id(&mut self, s: &str);

    /// Called after the last [`push_doctype_system_id`] call for a DOCTYPE system identifier.
    ///
    /// [`push_doctype_system_id`]: Self::push_doctype_system_id
    fn terminate_doctype_system_id(&mut self, offset: O) {}
}