diff options
| -rw-r--r-- | CHANGELOG.md | 4 | ||||
| -rw-r--r-- | src/basic_emitter.rs | 2 | ||||
| -rw-r--r-- | src/emitter.rs | 2 | ||||
| -rw-r--r-- | src/tokenizer.rs | 4 | ||||
| -rw-r--r-- | src/tokenizer/machine/utils.rs | 4 | ||||
| -rw-r--r-- | src/trace.rs | 2 | ||||
| -rw-r--r-- | src/tracing_emitter.rs | 4 | ||||
| -rw-r--r-- | tests/test_spans.rs | 18 | 
8 files changed, 34 insertions, 6 deletions
| diff --git a/CHANGELOG.md b/CHANGELOG.md index 52de087..5c6cc58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@  #### Features +* Added offsets for end-of-file tokens. +  * Added a blanket implementation to implement `Reader` for boxed readers.  #### Breaking changes @@ -35,6 +37,8 @@    * Replaced `emit_string` with `emit_char`. +  * Added an offset parameter to `emit_eof`. +  * `NaiveParser`: Removed `new_with_spans`.  ### 0.5.1 - 2023-09-03 diff --git a/src/basic_emitter.rs b/src/basic_emitter.rs index e67447b..0d37810 100644 --- a/src/basic_emitter.rs +++ b/src/basic_emitter.rs @@ -60,7 +60,7 @@ impl<O: Offset> Emitter<O> for BasicEmitter<O> {          self.emit_token(Token::Char(c));      } -    fn emit_eof(&mut self) { +    fn emit_eof(&mut self, offset: O) {          self.emit_token(Token::EndOfFile);      } diff --git a/src/emitter.rs b/src/emitter.rs index d1e1dfe..264d2f1 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -31,7 +31,7 @@ pub trait Emitter<O> {      fn emit_char(&mut self, c: char);      /// The state machine has reached the end of the file. -    fn emit_eof(&mut self); +    fn emit_eof(&mut self, offset: O);      /// Set the _current token_ to a start tag.      fn init_start_tag(&mut self, tag_offset: O, name_offset: O); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3359637..b41c208 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -171,7 +171,9 @@ where                  Ok(ControlToken::Continue) => (),                  Ok(ControlToken::Eof) => {                      self.eof = true; -                    self.machine.emitter.emit_eof(); +                    self.machine +                        .emitter +                        .emit_eof(self.machine.reader_position());                  }                  Ok(ControlToken::CdataOpen) => return Some(Ok(Event::CdataOpen)),              } diff --git a/src/tokenizer/machine/utils.rs b/src/tokenizer/machine/utils.rs index ea4d697..9752746 100644 --- a/src/tokenizer/machine/utils.rs +++ b/src/tokenizer/machine/utils.rs @@ -13,6 +13,10 @@ where      O: Offset,      E: Emitter<O>,  { +    pub(crate) fn reader_position(&self) -> O { +        self.reader.position() +    } +      /// Emits the given character as a character token.      #[inline]      pub(super) fn emit_char(&mut self, c: char) { diff --git a/src/trace.rs b/src/trace.rs index a816429..620d4f3 100644 --- a/src/trace.rs +++ b/src/trace.rs @@ -19,7 +19,7 @@ pub enum Trace {      EndTag(EndTagTrace),      Comment(CommentTrace),      Doctype(DoctypeTrace), -    EndOfFile, +    EndOfFile(usize),  }  /// Provides byte offsets and syntax information for a [`StartTag`] token. diff --git a/src/tracing_emitter.rs b/src/tracing_emitter.rs index 408e832..819f909 100644 --- a/src/tracing_emitter.rs +++ b/src/tracing_emitter.rs @@ -66,8 +66,8 @@ impl Emitter<usize> for TracingEmitter {          self.emit_token(Token::Char(c), Trace::Char);      } -    fn emit_eof(&mut self) { -        self.emit_token(Token::EndOfFile, Trace::EndOfFile); +    fn emit_eof(&mut self, offset: usize) { +        self.emit_token(Token::EndOfFile, Trace::EndOfFile(offset));      }      fn init_start_tag(&mut self, tag_offset: usize, name_offset: usize) { diff --git a/tests/test_spans.rs b/tests/test_spans.rs index 0e95be0..d19d6aa 100644 --- a/tests/test_spans.rs +++ b/tests/test_spans.rs @@ -351,6 +351,24 @@ fn doctype_id_spans() {      "###);  } +#[test] +fn eof_offset() { +    let html = "Where does it end?"; +    let labeler = |parser: Parser| { +        let mut labels = Vec::new(); +        for (_, trace) in parser.flatten() { +            if let Trace::EndOfFile(offset) = trace { +                labels.push((offset..offset, "here")); +            } +        } +        labels +    }; +    assert_snapshot!(test_and_annotate(html, labeler), @r###" +    Where does it end? +                      ^ here +    "###); +} +  fn annotate_errors(html: &'static str) -> String {      let mut parser = parser(html);      for _ in parser.by_ref() {} | 
