diff options
Diffstat (limited to 'src/tokenizer/mod.rs')
-rw-r--r-- | src/tokenizer/mod.rs | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 0acdcaf..5f3d65d 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -1705,4 +1705,28 @@ mod test { let results = tokenize(vector, opts); assert_eq!(results, expected); } + + #[test] + #[cfg(not(feature = "named-entities"))] + fn named_entities() { + let opts = TokenizerOpts::default(); + let vector = vec![String::from("&\r\n"), String::from("&aamp;\r\n")]; + let expected = vec![(Token::CharacterTokens("&\n&aamp;\n".into()), 3)]; + let results = tokenize(vector, opts); + assert_eq!(results, expected); + } + + #[test] + #[cfg(feature = "named-entities")] + fn named_entities() { + let opts = TokenizerOpts::default(); + let vector = vec![String::from("&\r\n"), String::from("&aamp;\r\n")]; + let expected = vec![ + (CharacterTokens("&\n".into()), 3), + (ParseError("Invalid character reference".into()), 3), + (CharacterTokens("&aamp;\n".into()), 4), + ]; + let results = tokenize(vector, opts); + assert_eq!(results, expected); + } } |