diff options
| author | Klimperfix | 2025-10-04 12:00:11 +0200 |
|---|---|---|
| committer | Klimperfix | 2025-10-04 12:16:29 +0200 |
| commit | 3ba8f024577e52c51833cd34b07ad90d14cb6338 (patch) | |
| tree | 017de5cb44f9e01ae39d450c7e7cf1b84a492631 | |
| parent | 624977bb9fd209b0c7c5f60a1332718de1d460d4 (diff) | |
| download | bibiman-3ba8f024577e52c51833cd34b07ad90d14cb6338.tar.gz bibiman-3ba8f024577e52c51833cd34b07ad90d14cb6338.zip | |
macro-sani: Implemented new algorithm to replace macros.
| -rw-r--r-- | src/bibiman/sanitize/optimized_sanitize.rs | 61 |
1 files changed, 43 insertions, 18 deletions
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index b3bf90d..8788d39 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -20,8 +20,13 @@ use std::collections::HashMap; use logos::Logos; -static LOOKUP: phf::Map<&'static str, &'static str> = phf_map! { - " " => " ", // str a forced space should substitute to. +static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! { + r"\mkbibquote" => ("\"", Some("\"")), + r"\enquote*" => ("\'", Some("\'")), + r"\enquote" => ("\"", Some("\"")), + r"\hyphen" => ("-", None), + r"\textbf" => ("", Some("")), + r"\textit" => ("", Some("")), }; #[derive(Logos, Debug)] @@ -30,41 +35,59 @@ enum Token { OpenCurlyBracket, #[token("}")] ClosedCurlyBracket, - #[regex(r"\\\w+")] + #[regex(r"\\[\*\w]+")] LaTeXMacro, #[token(r"\ ")] ForcedSpace, } pub fn optimized_sanitize(input_text: &str) -> String { - let mut out: Vec<&str> = Vec::new(); + let mut out: Vec<&str> = Vec::with_capacity(input_text.chars().count()); let mut bracket_counter: u32 = 0; - let mut counter_actions: HashMap<u32, String> = HashMap::new(); + let mut bc_up: bool = false; + let mut counter_actions: HashMap<u32, &str> = HashMap::new(); let mut lex = Token::lexer(input_text); while let Some(sometoken) = lex.next() { match sometoken { Ok(token) => match token { Token::ForcedSpace => { - out.push( - LOOKUP - .get(" ") - .expect("Something is wrong with the sanitization lookup table."), - ); + out.push(" "); + bc_up = false; } Token::OpenCurlyBracket => { - bracket_counter.saturating_add(1); - todo!(); + if bc_up { + bracket_counter = bracket_counter.saturating_add(1); + } else { + out.push("{") + } } Token::ClosedCurlyBracket => { - bracket_counter.saturating_sub(1); - todo!(); + if bracket_counter == 0 { + out.push("}") + } else { + match counter_actions.remove(&bracket_counter) { + None => out.push("}"), + Some(a) => out.push(a), + } + bracket_counter = bracket_counter - 1; + } } Token::LaTeXMacro => { - todo!() + let texmacro = lex.slice(); + if let Some(x) = LOOKUP.get(&texmacro) { + if let Some(end) = x.1 { + bc_up = true; + counter_actions.insert(bracket_counter + 1, end); + } + out.push(x.0); + } else { + out.push(texmacro) + } } }, Err(_) => { out.push(lex.slice()); + bc_up = false; } } } @@ -78,9 +101,11 @@ mod tests { #[test] fn check_sanitization() { let result = optimized_sanitize( - r"\mkbibquote{Intention} und \mkbibquote{Intentionen sind \hyphen\ bibquote.}", + r"\mkbibquote{Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote.}", ); - println!("{}", result); - panic!("Tatütata!"); + assert_eq!( + "\"Intention\" und \"Intentionen \"sind\" - bibquote.\"", + result + ) } } |
