From 3ba8f024577e52c51833cd34b07ad90d14cb6338 Mon Sep 17 00:00:00 2001 From: Klimperfix Date: Sat, 4 Oct 2025 12:00:11 +0200 Subject: macro-sani: Implemented new algorithm to replace macros. --- src/bibiman/sanitize/optimized_sanitize.rs | 61 +++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index b3bf90d..8788d39 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -20,8 +20,13 @@ use std::collections::HashMap; use logos::Logos; -static LOOKUP: phf::Map<&'static str, &'static str> = phf_map! { - " " => " ", // str a forced space should substitute to. +static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! { + r"\mkbibquote" => ("\"", Some("\"")), + r"\enquote*" => ("\'", Some("\'")), + r"\enquote" => ("\"", Some("\"")), + r"\hyphen" => ("-", None), + r"\textbf" => ("", Some("")), + r"\textit" => ("", Some("")), }; #[derive(Logos, Debug)] @@ -30,41 +35,59 @@ enum Token { OpenCurlyBracket, #[token("}")] ClosedCurlyBracket, - #[regex(r"\\\w+")] + #[regex(r"\\[\*\w]+")] LaTeXMacro, #[token(r"\ ")] ForcedSpace, } pub fn optimized_sanitize(input_text: &str) -> String { - let mut out: Vec<&str> = Vec::new(); + let mut out: Vec<&str> = Vec::with_capacity(input_text.chars().count()); let mut bracket_counter: u32 = 0; - let mut counter_actions: HashMap = HashMap::new(); + let mut bc_up: bool = false; + let mut counter_actions: HashMap = HashMap::new(); let mut lex = Token::lexer(input_text); while let Some(sometoken) = lex.next() { match sometoken { Ok(token) => match token { Token::ForcedSpace => { - out.push( - LOOKUP - .get(" ") - .expect("Something is wrong with the sanitization lookup table."), - ); + out.push(" "); + bc_up = false; } Token::OpenCurlyBracket => { - bracket_counter.saturating_add(1); - todo!(); + if bc_up { + bracket_counter = bracket_counter.saturating_add(1); + } else { + out.push("{") + } } Token::ClosedCurlyBracket => { - bracket_counter.saturating_sub(1); - todo!(); + if bracket_counter == 0 { + out.push("}") + } else { + match counter_actions.remove(&bracket_counter) { + None => out.push("}"), + Some(a) => out.push(a), + } + bracket_counter = bracket_counter - 1; + } } Token::LaTeXMacro => { - todo!() + let texmacro = lex.slice(); + if let Some(x) = LOOKUP.get(&texmacro) { + if let Some(end) = x.1 { + bc_up = true; + counter_actions.insert(bracket_counter + 1, end); + } + out.push(x.0); + } else { + out.push(texmacro) + } } }, Err(_) => { out.push(lex.slice()); + bc_up = false; } } } @@ -78,9 +101,11 @@ mod tests { #[test] fn check_sanitization() { let result = optimized_sanitize( - r"\mkbibquote{Intention} und \mkbibquote{Intentionen sind \hyphen\ bibquote.}", + r"\mkbibquote{Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote.}", ); - println!("{}", result); - panic!("Tatütata!"); + assert_eq!( + "\"Intention\" und \"Intentionen \"sind\" - bibquote.\"", + result + ) } } -- cgit v1.2.3