aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r--src/tokenizer.rs28
1 files changed, 25 insertions, 3 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 78d4fc4..7768ee4 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -41,6 +41,9 @@ pub struct Tokenizer<R: Reader, E: Emitter<R> = DefaultEmitter<R, ()>> {
to_reconsume: Stack2<Option<char>>,
pub(crate) character_reference_code: u32,
pub(crate) return_state: Option<InternalState>,
+ current_tag_name: String,
+ last_start_tag_name: String,
+ is_start_tag: bool,
}
impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
@@ -57,6 +60,9 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
temporary_buffer: String::new(),
character_reference_code: 0,
eof: false,
+ current_tag_name: String::new(),
+ last_start_tag_name: String::new(),
+ is_start_tag: false,
}
}
}
@@ -115,29 +121,44 @@ impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
self.emitter.emit_error(error, &self.reader);
}
+ /// Assuming the _current token_ is an end tag, return true if all of these hold. Return false otherwise.
+ ///
+ /// * the _last start tag_ exists
+ /// * the current end tag token's name equals to the last start tag's name.
+ ///
+ /// See also [WHATWG's definition of "appropriate end tag
+ /// token"](https://html.spec.whatwg.org/#appropriate-end-tag-token).
#[inline]
pub(crate) fn current_end_tag_is_appropriate(&mut self) -> bool {
- self.emitter.current_is_appropriate_end_tag_token()
+ self.current_tag_name == self.last_start_tag_name
}
#[inline]
pub(crate) fn init_start_tag(&mut self) {
self.emitter.init_start_tag(&self.reader);
+ self.current_tag_name.clear();
+ self.is_start_tag = true;
}
#[inline]
pub(crate) fn init_end_tag(&mut self) {
self.emitter.init_end_tag(&self.reader);
+ self.current_tag_name.clear();
+ self.is_start_tag = false;
}
#[inline]
pub(crate) fn push_tag_name(&mut self, s: &str) {
self.emitter.push_tag_name(s);
+ self.current_tag_name.push_str(s);
}
#[inline]
pub(crate) fn emit_current_tag(&mut self) {
self.emitter.emit_current_tag();
+ if self.is_start_tag {
+ std::mem::swap(&mut self.last_start_tag_name, &mut self.current_tag_name);
+ }
}
#[inline]
@@ -271,12 +292,13 @@ impl<R: Reader, E: Emitter<R>> Iterator for Tokenizer<R, E> {
}
}
-impl<S: crate::spans::Span<R>, R: Reader> Tokenizer<R, DefaultEmitter<R, S>> {
+impl<R: Reader, E: Emitter<R>> Tokenizer<R, E> {
/// Test-internal function to override internal state.
///
/// Only available with the `integration-tests` feature which is not public API.
#[cfg(feature = "integration-tests")]
pub fn set_last_start_tag(&mut self, last_start_tag: &str) {
- self.emitter.set_last_start_tag(Some(last_start_tag));
+ self.last_start_tag_name.clear();
+ self.last_start_tag_name.push_str(last_start_tag);
}
}