diff options
Diffstat (limited to 'src/lua/template.rs')
-rw-r--r-- | src/lua/template.rs | 333 |
1 files changed, 333 insertions, 0 deletions
diff --git a/src/lua/template.rs b/src/lua/template.rs new file mode 100644 index 0000000..de13481 --- /dev/null +++ b/src/lua/template.rs @@ -0,0 +1,333 @@ +use logos::Logos; + +use super::ModuleFunction; + +#[derive(Logos, Debug, PartialEq)] +pub(crate) enum Token { + #[token("{{")] + OpenCall, + + #[token("\\}}")] + EscapedCloseCall, + + #[token("}}")] + CloseCall, + + #[regex("<pre( +[^>]*)?>")] + OpenPre, + + #[regex("</pre>")] + ClosePre, + + #[regex("<raw>")] + OpenRaw, + + #[regex("</raw>")] + CloseRaw, + + #[token("<!--")] + OpenComment, + + #[token("-->")] + CloseComment, + + #[error] + Other, +} + +fn strip_marker(num: usize) -> String { + format!("\x7fUNIQ-{:05X}-QINU\x7f", num) +} + +#[derive(Logos, Debug, PartialEq)] +pub(crate) enum MarkerToken { + #[regex("\x7fUNIQ-[A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9]-QINU\x7f")] + StripMarker, + + #[error] + Other, +} + +#[derive(Debug)] +pub enum TokenType { + OpenPre, + ClosePre, + OpenRaw, + CloseRaw, + OpenCall, + CloseCall, +} + +#[derive(Debug)] +pub(crate) enum Sub<'a> { + Call(&'a str), + Raw(&'a str), + Pre { attrs: &'a str, content: &'a str }, + StrayToken(TokenType, &'a str), +} + +pub(crate) struct ParserState<'a> { + substitutions: Vec<Sub<'a>>, +} + +impl<'a> ParserState<'a> { + fn new() -> Self { + Self { + substitutions: Vec::new(), + } + } + + fn add(&mut self, sub: Sub<'a>) -> usize { + let idx = self.substitutions.len(); + self.substitutions.push(sub); + idx + } + + pub(crate) fn postprocess( + &self, + text: &str, + render: impl Fn(&Sub<'a>, &mut String) -> std::fmt::Result, + ) -> String { + let mut out = String::new(); + let mut lex = MarkerToken::lexer(text); + while let Some(tok) = lex.next() { + match tok { + MarkerToken::StripMarker => match usize::from_str_radix(&lex.slice()[6..11], 16) { + Ok(idx) => { + render(&self.substitutions[idx], &mut out); + } + Err(e) => out.push_str(lex.slice()), + }, + MarkerToken::Other => out.push_str(lex.slice()), + } + } + out.push_str(lex.remainder()); + out + } +} + +pub(crate) fn preprocess(text: &str) -> (String, ParserState) { + let mut stripped = String::new(); + let mut lex = Token::lexer(text); + let mut state = ParserState::new(); + + while let Some(tok) = lex.next() { + match tok { + Token::OpenComment => { + let span = lex.span(); + + stripped.push_str(if lex.any(|t| matches!(&t, Token::CloseComment)) { + &lex.source()[span.start..lex.span().end] + } else { + &lex.source()[span.start..] + }); + } + Token::OpenRaw => { + let span = lex.span(); + let slice = lex.slice(); + + stripped.push_str(&strip_marker(state.add( + if lex.any(|t| matches!(&t, Token::CloseRaw)) { + Sub::Raw(&lex.source()[span.end..lex.span().start]) + } else { + lex = Token::lexer(&lex.source()[span.end..]); + Sub::StrayToken(TokenType::OpenRaw, slice) + }, + ))); + } + Token::OpenCall => { + let span = lex.span(); + let slice = lex.slice(); + + stripped.push_str(&strip_marker(state.add( + if lex.any(|t| matches!(&t, Token::CloseCall)) { + Sub::Call(&lex.source()[span.end..lex.span().start]) + } else { + lex = Token::lexer(&lex.source()[span.end..]); + Sub::StrayToken(TokenType::OpenCall, slice) + }, + ))); + } + Token::OpenPre => { + let span = lex.span(); + let slice = lex.slice(); + + stripped.push_str(&strip_marker(state.add( + if lex.any(|t| matches!(&t, Token::ClosePre)) { + Sub::Pre { + attrs: &lex.source()[span.start + 4..span.end - 1], + content: &lex.source()[span.end..lex.span().start], + } + } else { + lex = Token::lexer(&lex.source()[span.end..]); + Sub::StrayToken(TokenType::OpenPre, slice) + }, + ))); + } + Token::CloseCall => { + stripped.push_str(&strip_marker( + state.add(Sub::StrayToken(TokenType::CloseCall, lex.slice())), + )); + } + Token::ClosePre => { + stripped.push_str(&strip_marker( + state.add(Sub::StrayToken(TokenType::ClosePre, lex.slice())), + )); + } + Token::CloseRaw => { + stripped.push_str(&strip_marker( + state.add(Sub::StrayToken(TokenType::CloseRaw, lex.slice())), + )); + } + _ => { + stripped.push_str(lex.slice()); + } + } + } + stripped.push_str(lex.remainder()); + + (stripped, state) +} + +#[cfg(test)] +mod tests { + use std::fmt::Write; + + use crate::lua::template::preprocess; + + use super::Sub; + + fn debug_format(sub: &Sub, out: &mut String) -> std::fmt::Result { + write!(out, "{:?}", sub) + } + + #[test] + fn test_parse() { + let (text, state) = preprocess("{{foobar}} <pre lang=js>foo</pre> <raw>{{test</raw>}}"); + let out = state.postprocess(&text, debug_format); + assert_eq!( + out, + r#"Call("foobar") Pre { attrs: " lang=js", content: "foo" } Raw("{{test")StrayToken(CloseCall, "}}")"# + ); + } + + #[test] + fn test_stray_open_tokens() { + let (text, state) = preprocess("{{ <pre> <raw>"); + let out = state.postprocess(&text, debug_format); + assert_eq!( + out, + r#"StrayToken(OpenCall, "{{") StrayToken(OpenPre, "<pre>") StrayToken(OpenRaw, "<raw>")"# + ); + } + + #[test] + fn test_stray_close_tokens() { + let (text, state) = preprocess("}} </pre> </raw>"); + let out = state.postprocess(&text, debug_format); + assert_eq!( + out, + r#"StrayToken(CloseCall, "}}") StrayToken(ClosePre, "</pre>") StrayToken(CloseRaw, "</raw>")"# + ); + } + + #[test] + fn test_comment() { + let (text, _state) = + preprocess("<!-- {{foobar}} <pre lang=js>foo</pre> <raw>{{test</raw>}}"); + assert_eq!( + text, + r#"<!-- {{foobar}} <pre lang=js>foo</pre> <raw>{{test</raw>}}"# + ); + } + + #[test] + fn test_call_after_stray_open() { + let (text, state) = preprocess("<pre> {{foo}}"); + let out = state.postprocess(&text, debug_format); + assert_eq!(out, r#"StrayToken(OpenPre, "<pre>") Call("foo")"#); + } +} + +#[derive(Logos, Debug, PartialEq)] +enum CallInnerToken { + #[token("\\|")] + EscapedPipe, + + #[regex("\\}}")] + EscapedClose, + + #[token("|")] + Pipe, + + #[error] + Other, +} + +#[derive(Debug)] +pub enum ArgIndex { + Str(String), + Num(usize), +} + +fn parse_arg(text: &str, cur_idx: &mut usize) -> (ArgIndex, String) { + if let Some((key, val)) = text.split_once('=') { + let key = key.trim(); + let idx = match key.parse::<usize>() { + Ok(n) => ArgIndex::Num(n), + Err(_) => ArgIndex::Str(key.to_string()), + }; + (idx, val.trim().to_string()) + } else { + let ret = (ArgIndex::Num(*cur_idx), text.trim().to_string()); + *cur_idx += 1; + ret + } +} + +pub fn parse_call_args(text: &str) -> (ModuleFunction, std::vec::IntoIter<(ArgIndex, String)>) { + let mut args = Vec::new(); + let mut arg = String::new(); + + let mut iter = text.splitn(2, '|'); + let arg0 = iter.next().unwrap(); + + let mut idx = 1; + + if let Some(rest) = iter.next() { + let mut lexer = CallInnerToken::lexer(rest); + while let Some(tok) = lexer.next() { + match tok { + CallInnerToken::EscapedClose => arg.push_str("}}"), + CallInnerToken::EscapedPipe => arg.push('|'), + CallInnerToken::Other => arg.push_str(lexer.slice()), + CallInnerToken::Pipe => { + args.push(parse_arg(&arg, &mut idx)); + arg = String::new(); + idx += 1; + } + } + } + arg.push_str(lexer.remainder()); + args.push(parse_arg(&arg, &mut idx)); + } + + let mut parts = arg0.splitn(2, '.'); + + let module_name; + let mut function_name = parts.next().unwrap(); + if let Some(next) = parts.next() { + module_name = function_name; + function_name = next; + } else { + module_name = "default"; + } + + ( + ModuleFunction { + module_name, + function_name, + }, + args.into_iter(), + ) +} |