aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKlimperfix2025-10-04 12:00:11 +0200
committerKlimperfix2025-10-04 12:16:29 +0200
commit3ba8f024577e52c51833cd34b07ad90d14cb6338 (patch)
tree017de5cb44f9e01ae39d450c7e7cf1b84a492631
parent624977bb9fd209b0c7c5f60a1332718de1d460d4 (diff)
downloadbibiman-3ba8f024577e52c51833cd34b07ad90d14cb6338.tar.gz
bibiman-3ba8f024577e52c51833cd34b07ad90d14cb6338.zip
macro-sani: Implemented new algorithm to replace macros.
-rw-r--r--src/bibiman/sanitize/optimized_sanitize.rs61
1 files changed, 43 insertions, 18 deletions
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index b3bf90d..8788d39 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -20,8 +20,13 @@ use std::collections::HashMap;
use logos::Logos;
-static LOOKUP: phf::Map<&'static str, &'static str> = phf_map! {
- " " => " ", // str a forced space should substitute to.
+static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! {
+ r"\mkbibquote" => ("\"", Some("\"")),
+ r"\enquote*" => ("\'", Some("\'")),
+ r"\enquote" => ("\"", Some("\"")),
+ r"\hyphen" => ("-", None),
+ r"\textbf" => ("", Some("")),
+ r"\textit" => ("", Some("")),
};
#[derive(Logos, Debug)]
@@ -30,41 +35,59 @@ enum Token {
OpenCurlyBracket,
#[token("}")]
ClosedCurlyBracket,
- #[regex(r"\\\w+")]
+ #[regex(r"\\[\*\w]+")]
LaTeXMacro,
#[token(r"\ ")]
ForcedSpace,
}
pub fn optimized_sanitize(input_text: &str) -> String {
- let mut out: Vec<&str> = Vec::new();
+ let mut out: Vec<&str> = Vec::with_capacity(input_text.chars().count());
let mut bracket_counter: u32 = 0;
- let mut counter_actions: HashMap<u32, String> = HashMap::new();
+ let mut bc_up: bool = false;
+ let mut counter_actions: HashMap<u32, &str> = HashMap::new();
let mut lex = Token::lexer(input_text);
while let Some(sometoken) = lex.next() {
match sometoken {
Ok(token) => match token {
Token::ForcedSpace => {
- out.push(
- LOOKUP
- .get(" ")
- .expect("Something is wrong with the sanitization lookup table."),
- );
+ out.push(" ");
+ bc_up = false;
}
Token::OpenCurlyBracket => {
- bracket_counter.saturating_add(1);
- todo!();
+ if bc_up {
+ bracket_counter = bracket_counter.saturating_add(1);
+ } else {
+ out.push("{")
+ }
}
Token::ClosedCurlyBracket => {
- bracket_counter.saturating_sub(1);
- todo!();
+ if bracket_counter == 0 {
+ out.push("}")
+ } else {
+ match counter_actions.remove(&bracket_counter) {
+ None => out.push("}"),
+ Some(a) => out.push(a),
+ }
+ bracket_counter = bracket_counter - 1;
+ }
}
Token::LaTeXMacro => {
- todo!()
+ let texmacro = lex.slice();
+ if let Some(x) = LOOKUP.get(&texmacro) {
+ if let Some(end) = x.1 {
+ bc_up = true;
+ counter_actions.insert(bracket_counter + 1, end);
+ }
+ out.push(x.0);
+ } else {
+ out.push(texmacro)
+ }
}
},
Err(_) => {
out.push(lex.slice());
+ bc_up = false;
}
}
}
@@ -78,9 +101,11 @@ mod tests {
#[test]
fn check_sanitization() {
let result = optimized_sanitize(
- r"\mkbibquote{Intention} und \mkbibquote{Intentionen sind \hyphen\ bibquote.}",
+ r"\mkbibquote{Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote.}",
);
- println!("{}", result);
- panic!("Tatütata!");
+ assert_eq!(
+ "\"Intention\" und \"Intentionen \"sind\" - bibquote.\"",
+ result
+ )
}
}