diff options
| author | Martin Fischer <martin@push-f.com> | 2021-11-19 08:38:57 +0100 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2021-11-19 08:38:58 +0100 | 
| commit | 98ad8cec144900c7799772b3a53241825b416b4f (patch) | |
| tree | 8a18df3e9ff4c4be5316112c0f02db72f9ef94f2 /src/tokenizer/char_ref | |
| parent | 7207abccd9dccb15eb37f43a8f763cac99be14d4 (diff) | |
feature gate named-entities (making phf optional)
Diffstat (limited to 'src/tokenizer/char_ref')
| -rw-r--r-- | src/tokenizer/char_ref/data.rs | 2 | ||||
| -rw-r--r-- | src/tokenizer/char_ref/mod.rs | 25 | 
2 files changed, 25 insertions, 2 deletions
| diff --git a/src/tokenizer/char_ref/data.rs b/src/tokenizer/char_ref/data.rs index fa839ba..9487034 100644 --- a/src/tokenizer/char_ref/data.rs +++ b/src/tokenizer/char_ref/data.rs @@ -7,6 +7,7 @@  // option. This file may not be copied, modified, or distributed  // except according to those terms.  //! Data that is known at compile-time and hard-coded into the binary. +#[cfg(feature = "named-entities")]  use phf::Map;  /// The spec replaces most characters in the ISO-2022 C1 control code range @@ -47,4 +48,5 @@ pub static C1_REPLACEMENTS: [Option<char>; 32] = [      Some('\u{0178}'),  ]; +#[cfg(feature = "named-entities")]  include!(concat!(env!("OUT_DIR"), "/named_entities.rs")); diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs index 41f4c13..9c01bdf 100644 --- a/src/tokenizer/char_ref/mod.rs +++ b/src/tokenizer/char_ref/mod.rs @@ -40,6 +40,7 @@ enum State {      Octothorpe,      Numeric(u32), // base      NumericSemicolon, +    #[cfg(feature = "named-entities")]      Named,      BogusName,  } @@ -55,7 +56,9 @@ pub struct CharRefTokenizer {      hex_marker: Option<char>,      name_buf_opt: Option<String>, +    #[cfg(feature = "named-entities")]      name_match: Option<(u32, u32)>, +    #[cfg(feature = "named-entities")]      name_len: usize,  } @@ -72,7 +75,9 @@ impl CharRefTokenizer {              seen_digit: false,              hex_marker: None,              name_buf_opt: None, +            #[cfg(feature = "named-entities")]              name_match: None, +            #[cfg(feature = "named-entities")]              name_len: 0,          }      } @@ -83,6 +88,7 @@ impl CharRefTokenizer {          self.result.expect("get_result called before done")      } +    #[cfg(feature = "named-entities")]      fn name_buf(&self) -> &str {          self.name_buf_opt              .as_ref() @@ -127,6 +133,7 @@ impl CharRefTokenizer {              Octothorpe => self.do_octothorpe(tokenizer, input),              Numeric(base) => self.do_numeric(tokenizer, input, base),              NumericSemicolon => self.do_numeric_semicolon(tokenizer, input), +            #[cfg(feature = "named-entities")]              Named => self.do_named(tokenizer, input),              BogusName => self.do_bogus_name(tokenizer, input),          } @@ -148,7 +155,14 @@ impl CharRefTokenizer {              }              _ => { -                self.state = Named; +                #[cfg(feature = "named-entities")] +                { +                    self.state = Named; +                } +                #[cfg(not(feature = "named-entities"))] +                { +                    self.state = BogusName; +                }                  self.name_buf_opt = Some(String::new());                  Progress              } @@ -270,6 +284,7 @@ impl CharRefTokenizer {          self.finish_one(c)      } +    #[cfg(feature = "named-entities")]      fn do_named<Sink: TokenSink>(          &mut self,          tokenizer: &mut Tokenizer<Sink>, @@ -294,6 +309,7 @@ impl CharRefTokenizer {          }      } +    #[cfg(feature = "named-entities")]      fn emit_name_error<Sink: TokenSink>(&mut self, tokenizer: &mut Tokenizer<Sink>) {          let msg = format_if!(              tokenizer.opts.exact_errors, @@ -308,6 +324,7 @@ impl CharRefTokenizer {          input.push_front(self.name_buf_opt.take().unwrap());      } +    #[cfg(feature = "named-entities")]      fn finish_named<Sink: TokenSink>(          &mut self,          tokenizer: &mut Tokenizer<Sink>, @@ -405,7 +422,10 @@ impl CharRefTokenizer {          self.name_buf_mut().push(c);          match c {              _ if is_ascii_alnum(c) => return Progress, -            ';' => self.emit_name_error(tokenizer), +            ';' => { +                #[cfg(feature = "named-entities")] +                self.emit_name_error(tokenizer); +            }              _ => (),          }          self.unconsume_name(input); @@ -428,6 +448,7 @@ impl CharRefTokenizer {                      self.finish_numeric(tokenizer);                  } +                #[cfg(feature = "named-entities")]                  Named => drop(self.finish_named(tokenizer, input, None)),                  BogusName => { | 
