#![deny(missing_docs)]
// This is an HTML parser. HTML can be untrusted input from the internet.
#![forbid(unsafe_code)]
#![doc = include_str!("../README.md")]
mod emitter;
mod entities;
mod error;
mod machine;
mod never;
mod reader;
#[cfg(feature = "integration-tests")]
pub use machine::State;
#[cfg(not(feature = "integration-tests"))]
use machine::State;
use machine::{
ascii_digit_pat, control_pat, noncharacter_pat, surrogate_pat, whitespace_pat, ControlToken,
};
pub use emitter::{DefaultEmitter, Doctype, Emitter, EndTag, StartTag, Token};
pub use error::Error;
pub use never::Never;
pub use reader::{BufReadReader, Readable, Reader, StringReader};
macro_rules! ctostr {
($c:expr) => {
&*$c.encode_utf8(&mut [0; 4])
};
}
// this is a stack that can hold 0 to 2 Ts
#[derive(Debug, Default)]
struct Stack2(Option<(T, Option)>);
impl Stack2 {
#[inline]
fn push(&mut self, c: T) {
self.0 = match self.0 {
None => Some((c, None)),
Some((c1, None)) => Some((c1, Some(c))),
Some((_c1, Some(_c2))) => panic!("stack full!"),
}
}
#[inline]
fn pop(&mut self) -> Option {
let (new_self, rv) = match self.0 {
Some((c1, Some(c2))) => (Some((c1, None)), Some(c2)),
Some((c1, None)) => (None, Some(c1)),
None => (None, None),
};
self.0 = new_self;
rv
}
#[inline]
fn is_empty(&self) -> bool {
matches!(self.0, None)
}
}
/// A HTML tokenizer. See crate-level docs for basic usage.
pub struct Tokenizer {
eof: bool,
state: State,
emitter: E,
temporary_buffer: String,
reader: R,
to_reconsume: Stack2