From 7350c7382bda85618c3dae1d74cc8cbe7ddd4b9d Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Tue, 30 Sep 2025 21:49:31 +0200
Subject: Implemented basic sanitizing.

The default Regex crate does not support the required regex features, so
I'm using the fancy_regex crate that does.
---
 src/bibiman.rs          |  3 ++
 src/bibiman/sanitize.rs | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 src/bibiman/sanitize.rs

(limited to 'src')
diff --git a/src/bibiman.rs b/src/bibiman.rs
index 6d21f8c..c423ce1 100644
--- a/src/bibiman.rs
+++ b/src/bibiman.rs
@@ -44,6 +44,9 @@ pub mod entries;
 pub mod keywords;
 pub mod search;
 
+/// Module with function to sanitize text with LaTeX Macros into readable unicode text.
+pub mod sanitize;
+
 // Areas in which actions are possible
 #[derive(Debug)]
 pub enum CurrentArea {
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
new file mode 100644
index 0000000..aaf81ad
--- /dev/null
+++ b/src/bibiman/sanitize.rs
@@ -0,0 +1,73 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025  lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use fancy_regex::Regex;
+use unicodeit::replace as unicode_replace;
+
+/// Sanitizing process rules as regex cmds.
+///
+/// Only macros that are not already covered by unicodeit should be processed in this way.
+///
+// Regex to capture content between brackets: `(\{(?:[^{}]++|(\1))*+\})`
+// Alternative without capturing the outer brackets: `\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}`
+//
+const SANITIZE_REGEX: &[(&str, &str)] = &[
+    (
+        r"\\mkbibquote\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}",
+        "\"${1}\"",
+    ),
+    (r"\\hyphen", "-"),
+];
+
+/// Sanitize one String with LaTeX Macros into a more readable one without.
+///
+/// If one is going to mass-sanitize strings, one should use the [`sanitize`]
+/// function for performance reasons instead, to process multiple strings at once.
+///
+/// This is just a shortcut for the sanitize function.
+pub fn sanitize_one(input_text: &str) -> String {
+    // This does not panic, the sanitize function always returns
+    // as many elements in the returned list as it get's elements
+    // in the input vector.
+    sanitize(vec![input_text]).get(0).unwrap().to_string()
+}
+
+/// Sanitize multiple Strings with LaTeX Macros into more readable unicode equivalents.
+///
+/// This function does always return the same amount of Strings as it gets in the input list.
+pub fn sanitize(input_text: Vec<&str>) -> Vec<String> {
+    let mut result: Vec<String> = input_text.into_iter().map(|s| s.to_string()).collect();
+    let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len());
+    // build regex
+    for (search, replace) in SANITIZE_REGEX {
+        regex.push((Regex::new(search).unwrap(), replace));
+    }
+
+    // process strings
+    let result_len = result.len();
+    for (re, replace) in regex {
+        for i in 0..result_len {
+            result[i] = re.replace_all(&result[i], replace).to_string();
+        }
+    }
+    for i in 0..result_len {
+        result[i] = unicode_replace(&result[i]);
+    }
+
+    // return result
+    result
+}
-- 
cgit v1.2.3


From dfb7edde13ca39af3e23b80e40272e02aa093919 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Fri, 3 Oct 2025 12:37:16 +0200
Subject: Sanitization hooked into bibiman.

---
 src/bibiman/bibisetup.rs | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 3bcb717..8466169 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -26,6 +26,7 @@ use std::{fs, path::PathBuf};
 use walkdir::WalkDir;
 
 use crate::app;
+use crate::bibiman::sanitize::sanitize_one;
 use crate::cliargs::{self};
 use crate::config::BibiConfig;
 
@@ -294,11 +295,28 @@ impl BibiSetup {
                 let filepaths: (Option<Vec<OsString>>, bool) =
                     { Self::get_filepath(k, bibliography, &mut pdf_files) };
 
+                // bibiman will sanitize some fields at this point,
+                // this may cause longer startup-load-times.
+                //
+                // It may be better to sanitize them somewhere else, so bibiman
+                // does not loose the original text-information including the
+                // LaTeX macros present in the bibfile. From here on, they will be
+                // gone.
+                //
+                // The following fields are going to be sanitized:
+                //
+                // - title
+                // - subtitle
+                // - abstract_text
+                //
+                // TODO: Once the final decision to implement the sanitization at
+                // this point, one could write a constructor for the BibiData struct
+                // which handles the sanitization.
                 BibiData {
                     id: i as u32,
                     authors: Self::get_authors(k, bibliography),
                     short_author: String::new(),
-                    title: Self::get_title(k, bibliography),
+                    title: sanitize_one(&Self::get_title(k, bibliography)),
                     year: Self::get_year(k, bibliography),
                     custom_field: (
                         cfg.general.custom_column.clone(),
@@ -306,11 +324,14 @@ impl BibiSetup {
                     ),
                     keywords: Self::get_keywords(k, bibliography),
                     citekey: k.to_owned(),
-                    abstract_text: Self::get_abstract(k, bibliography),
+                    abstract_text: sanitize_one(&Self::get_abstract(k, bibliography)),
                     doi_url: Self::get_weblink(k, bibliography),
                     filepath: filepaths.0,
                     file_field: filepaths.1,
-                    subtitle: Self::get_subtitle(k, bibliography),
+                    subtitle: match Self::get_subtitle(k, bibliography) {
+                        None => None,
+                        Some(x) => Some(sanitize_one(&x)),
+                    },
                     notes: if note_files.is_some() {
                         Self::get_notepath(k, &mut note_files, &ext)
                     } else {
-- 
cgit v1.2.3


From 26befd38aedbfdd278c3096644baf69e4a1fb051 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Fri, 3 Oct 2025 16:56:30 +0200
Subject: Now storing the sanitized data seperately, keeping the original.

---
 src/bibiman/bibisetup.rs | 154 ++++++++++++++++++++++++++---------------------
 src/bibiman/entries.rs   |   8 ++-
 src/bibiman/sanitize.rs  |  96 +++++++++++++++++++++++------
 src/bibiman/search.rs    |   4 +-
 4 files changed, 175 insertions(+), 87 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 8466169..48046e9 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -26,7 +26,7 @@ use std::{fs, path::PathBuf};
 use walkdir::WalkDir;
 
 use crate::app;
-use crate::bibiman::sanitize::sanitize_one;
+use crate::bibiman::sanitize::{mass_sanitize, sanitize_one_bibidata};
 use crate::cliargs::{self};
 use crate::config::BibiConfig;
 
@@ -77,6 +77,18 @@ pub struct BibiData {
     pub subtitle: Option<String>,
     pub notes: Option<Vec<OsString>>,
     pub symbols: [Option<String>; 3],
+    /// This field should be set to None when initially creating a BibiData instance.
+    /// It then can be generated from the constructed BibiData Object using
+    /// `BibiData::gen_sanitized()`
+    pub sanitized_bibi_data: Option<SanitizedBibiData>,
+}
+
+/// Struct that holds sanitized bibidata data.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct SanitizedBibiData {
+    pub title: String,
+    pub subtitle: Option<String>,
+    pub abstract_text: String,
 }
 
 #[derive(Debug, Clone, PartialEq)]
@@ -120,22 +132,41 @@ impl BibiData {
         //     self.pubtype(),
         //     &self.symbols,
         // ]
-
-        BibiRow {
-            authors: {
-                if self.short_author.is_empty() {
-                    self.authors()
-                } else {
-                    &self.short_author
-                }
-            },
-            title: self.title(),
-            year: self.year(),
-            custom_field_value: self.custom_field_value(),
-            symbols: &self.symbols,
+        let author_ref = if self.short_author.is_empty() {
+            self.authors()
+        } else {
+            &self.short_author
+        };
+        if let Some(sanidata) = &self.sanitized_bibi_data {
+            BibiRow {
+                authors: author_ref,
+                title: &sanidata.title,
+                year: self.year(),
+                custom_field_value: self.custom_field_value(),
+                symbols: &self.symbols,
+            }
+        } else {
+            BibiRow {
+                authors: author_ref,
+                title: self.title(),
+                year: self.year(),
+                custom_field_value: self.custom_field_value(),
+                symbols: &self.symbols,
+            }
         }
     }
 
+    /// Generates the SanitizedBibiData for the BibiData.
+    ///
+    /// Consumes self and returns a new BibiData struct.
+    ///
+    /// If multiple SanitizedBibiData are to be generated,
+    /// one should use the [`mass_sanitize`] function instead.
+    pub fn gen_sanitized(mut self) -> Self {
+        self.sanitized_bibi_data = Some(sanitize_one_bibidata(&self));
+        self
+    }
+
     pub fn entry_id(&self) -> &u32 {
         &self.id
     }
@@ -288,59 +319,48 @@ impl BibiSetup {
             } else {
                 None
             };
-        citekeys
-            .iter()
-            .enumerate()
-            .map(|(i, k)| {
-                let filepaths: (Option<Vec<OsString>>, bool) =
-                    { Self::get_filepath(k, bibliography, &mut pdf_files) };
-
-                // bibiman will sanitize some fields at this point,
-                // this may cause longer startup-load-times.
-                //
-                // It may be better to sanitize them somewhere else, so bibiman
-                // does not loose the original text-information including the
-                // LaTeX macros present in the bibfile. From here on, they will be
-                // gone.
-                //
-                // The following fields are going to be sanitized:
-                //
-                // - title
-                // - subtitle
-                // - abstract_text
-                //
-                // TODO: Once the final decision to implement the sanitization at
-                // this point, one could write a constructor for the BibiData struct
-                // which handles the sanitization.
-                BibiData {
-                    id: i as u32,
-                    authors: Self::get_authors(k, bibliography),
-                    short_author: String::new(),
-                    title: sanitize_one(&Self::get_title(k, bibliography)),
-                    year: Self::get_year(k, bibliography),
-                    custom_field: (
-                        cfg.general.custom_column.clone(),
-                        Self::get_custom_field(k, bibliography, &cfg.general.custom_column),
-                    ),
-                    keywords: Self::get_keywords(k, bibliography),
-                    citekey: k.to_owned(),
-                    abstract_text: sanitize_one(&Self::get_abstract(k, bibliography)),
-                    doi_url: Self::get_weblink(k, bibliography),
-                    filepath: filepaths.0,
-                    file_field: filepaths.1,
-                    subtitle: match Self::get_subtitle(k, bibliography) {
-                        None => None,
-                        Some(x) => Some(sanitize_one(&x)),
-                    },
-                    notes: if note_files.is_some() {
-                        Self::get_notepath(k, &mut note_files, &ext)
-                    } else {
-                        None
-                    },
-                    symbols: [None, None, None],
-                }
-            })
-            .collect()
+        //
+        //
+        // bibiman will sanitize some fields at this point,
+        // this may cause longer startup-load-times.
+        //
+        //
+        mass_sanitize(
+            citekeys
+                .iter()
+                .enumerate()
+                .map(|(i, k)| {
+                    let filepaths: (Option<Vec<OsString>>, bool) =
+                        { Self::get_filepath(k, bibliography, &mut pdf_files) };
+
+                    BibiData {
+                        id: i as u32,
+                        authors: Self::get_authors(k, bibliography),
+                        short_author: String::new(),
+                        title: Self::get_title(k, bibliography),
+                        year: Self::get_year(k, bibliography),
+                        custom_field: (
+                            cfg.general.custom_column.clone(),
+                            Self::get_custom_field(k, bibliography, &cfg.general.custom_column),
+                        ),
+                        keywords: Self::get_keywords(k, bibliography),
+                        citekey: k.to_owned(),
+                        abstract_text: Self::get_abstract(k, bibliography),
+                        doi_url: Self::get_weblink(k, bibliography),
+                        filepath: filepaths.0,
+                        file_field: filepaths.1,
+                        subtitle: Self::get_subtitle(k, bibliography),
+                        notes: if note_files.is_some() {
+                            Self::get_notepath(k, &mut note_files, &ext)
+                        } else {
+                            None
+                        },
+                        symbols: [None, None, None],
+                        sanitized_bibi_data: None,
+                    }
+                })
+                .collect(),
+        )
     }
 
     // get list of citekeys from the given bibfile
diff --git a/src/bibiman/entries.rs b/src/bibiman/entries.rs
index db6d6bf..0b35a8b 100644
--- a/src/bibiman/entries.rs
+++ b/src/bibiman/entries.rs
@@ -174,7 +174,9 @@ mod tests {
             subtitle: None,
             notes: None,
             symbols: [None, None, None],
-        };
+            sanitized_bibi_data: None,
+        }
+        .gen_sanitized();
 
         let entry_vec = BibiData::ref_vec(&mut entry, &cfg);
 
@@ -194,7 +196,9 @@ mod tests {
             subtitle: None,
             notes: None,
             symbols: [None, None, None],
-        };
+            sanitized_bibi_data: None,
+        }
+        .gen_sanitized();
 
         let entry_vec_editors = BibiData::ref_vec(&mut entry_editors, &cfg);
 
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
index aaf81ad..614ed11 100644
--- a/src/bibiman/sanitize.rs
+++ b/src/bibiman/sanitize.rs
@@ -18,6 +18,8 @@
 use fancy_regex::Regex;
 use unicodeit::replace as unicode_replace;
 
+use crate::bibiman::bibisetup::{BibiData, SanitizedBibiData};
+
 /// Sanitizing process rules as regex cmds.
 ///
 /// Only macros that are not already covered by unicodeit should be processed in this way.
@@ -33,6 +35,71 @@ const SANITIZE_REGEX: &[(&str, &str)] = &[
     (r"\\hyphen", "-"),
 ];
 
+/// Function to build the sanitization regex vector:
+fn regex_vector() -> Vec<(Regex, &'static str)> {
+    let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len());
+    // build regex
+    for (search, replace) in SANITIZE_REGEX {
+        regex.push((Regex::new(search).unwrap(), replace));
+    }
+    regex
+}
+
+fn optimized_sanitize(input_text: Vec<&str>, regex: &Vec<(Regex, &str)>) -> Vec<String> {
+    let mut result: Vec<String> = input_text.into_iter().map(|s| s.to_string()).collect();
+
+    // process strings
+    let result_len = result.len();
+    for (re, replace) in regex {
+        for i in 0..result_len {
+            result[i] = re.replace_all(&result[i], *replace).to_string();
+        }
+    }
+    for i in 0..result_len {
+        result[i] = unicode_replace(&result[i]);
+    }
+
+    // return result
+    result
+}
+
+/// Helper macro to sanitize bibidata structs.
+/// Here lives the code that generates SanitizedBibiData
+/// structs from BibiData structs.
+macro_rules! optimized_sanitize_bibidata {
+    ($bibidata:expr, $regex:expr) => {
+        match &$bibidata.subtitle {
+            None => {
+                let sanitized_data =
+                    optimized_sanitize(vec![&$bibidata.title, &$bibidata.abstract_text], &$regex);
+                SanitizedBibiData {
+                    title: sanitized_data[0].clone(),
+                    subtitle: None,
+                    abstract_text: sanitized_data[1].clone(),
+                }
+            }
+            Some(subtitle) => {
+                let sanitized_data = optimized_sanitize(
+                    vec![&$bibidata.title, subtitle, &$bibidata.abstract_text],
+                    &$regex,
+                );
+                SanitizedBibiData {
+                    title: sanitized_data[0].clone(),
+                    subtitle: Some(sanitized_data[1].clone()),
+                    abstract_text: sanitized_data[2].clone(),
+                }
+            }
+        }
+    };
+}
+
+/// Sanitize one BibiData and return a SanitizedBibiData struct.
+/// This function does ignore any existing sanitization of the bibidata struct.
+pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
+    let regex = regex_vector();
+    optimized_sanitize_bibidata!(bibidata, regex)
+}
+
 /// Sanitize one String with LaTeX Macros into a more readable one without.
 ///
 /// If one is going to mass-sanitize strings, one should use the [`sanitize`]
@@ -50,24 +117,19 @@ pub fn sanitize_one(input_text: &str) -> String {
 ///
 /// This function does always return the same amount of Strings as it gets in the input list.
 pub fn sanitize(input_text: Vec<&str>) -> Vec<String> {
-    let mut result: Vec<String> = input_text.into_iter().map(|s| s.to_string()).collect();
-    let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len());
-    // build regex
-    for (search, replace) in SANITIZE_REGEX {
-        regex.push((Regex::new(search).unwrap(), replace));
-    }
+    optimized_sanitize(input_text, &regex_vector())
+}
 
-    // process strings
-    let result_len = result.len();
-    for (re, replace) in regex {
-        for i in 0..result_len {
-            result[i] = re.replace_all(&result[i], replace).to_string();
-        }
-    }
-    for i in 0..result_len {
-        result[i] = unicode_replace(&result[i]);
-    }
+/// Sanitize a whole `Vec<BibiData>`, returning a new sanitized one.
+pub fn mass_sanitize(bibidata: Vec<BibiData>) -> Vec<BibiData> {
+    let regex: Vec<(Regex, &str)> = regex_vector();
 
-    // return result
+    let mut result: Vec<BibiData> = Vec::with_capacity(bibidata.len());
+    for entry in bibidata {
+        result.push(BibiData {
+            sanitized_bibi_data: Some(optimized_sanitize_bibidata!(entry, regex)),
+            ..entry
+        });
+    }
     result
 }
diff --git a/src/bibiman/search.rs b/src/bibiman/search.rs
index e0c5f17..2156634 100644
--- a/src/bibiman/search.rs
+++ b/src/bibiman/search.rs
@@ -141,7 +141,9 @@ mod tests {
             subtitle: None,
             notes: None,
             symbols: [None, None, None],
-        };
+            sanitized_bibi_data: None,
+        }
+        .gen_sanitized();
 
         let joined_vec = BibiSearch::convert_to_string(&bibvec);
 
-- 
cgit v1.2.3


From 161fc7010cb863e1af534ce1d173136401816a32 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Fri, 3 Oct 2025 18:19:25 +0200
Subject: Removed unused sanitization functions.

---
 src/bibiman/sanitize.rs | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
index 614ed11..823b91c 100644
--- a/src/bibiman/sanitize.rs
+++ b/src/bibiman/sanitize.rs
@@ -100,26 +100,6 @@ pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
     optimized_sanitize_bibidata!(bibidata, regex)
 }
 
-/// Sanitize one String with LaTeX Macros into a more readable one without.
-///
-/// If one is going to mass-sanitize strings, one should use the [`sanitize`]
-/// function for performance reasons instead, to process multiple strings at once.
-///
-/// This is just a shortcut for the sanitize function.
-pub fn sanitize_one(input_text: &str) -> String {
-    // This does not panic, the sanitize function always returns
-    // as many elements in the returned list as it get's elements
-    // in the input vector.
-    sanitize(vec![input_text]).get(0).unwrap().to_string()
-}
-
-/// Sanitize multiple Strings with LaTeX Macros into more readable unicode equivalents.
-///
-/// This function does always return the same amount of Strings as it gets in the input list.
-pub fn sanitize(input_text: Vec<&str>) -> Vec<String> {
-    optimized_sanitize(input_text, &regex_vector())
-}
-
 /// Sanitize a whole `Vec<BibiData>`, returning a new sanitized one.
 pub fn mass_sanitize(bibidata: Vec<BibiData>) -> Vec<BibiData> {
     let regex: Vec<(Regex, &str)> = regex_vector();
-- 
cgit v1.2.3


From 624977bb9fd209b0c7c5f60a1332718de1d460d4 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Fri, 3 Oct 2025 22:57:37 +0200
Subject: macro-sani: started impl new algorithm

---
 Cargo.lock                                 | 146 +++++++++++++++++++++--------
 Cargo.toml                                 |   4 +-
 src/bibiman/bibisetup.rs                   |  76 +++++++--------
 src/bibiman/sanitize.rs                    |  94 +++----------------
 src/bibiman/sanitize/optimized_sanitize.rs |  86 +++++++++++++++++
 5 files changed, 241 insertions(+), 165 deletions(-)
 create mode 100644 src/bibiman/sanitize/optimized_sanitize.rs

(limited to 'src')

diff --git a/Cargo.lock b/Cargo.lock
index 535b929..22a5a48 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -89,6 +89,12 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
+[[package]]
+name = "beef"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
+
 [[package]]
 name = "bibiman"
 version = "0.14.1"
@@ -99,13 +105,14 @@ dependencies = [
  "crossterm",
  "dirs",
  "editor-command",
- "fancy-regex",
  "figment",
  "futures",
  "itertools",
  "lexopt",
+ "logos",
  "nucleo-matcher",
  "owo-colors",
+ "phf",
  "rand",
  "ratatui",
  "regex",
@@ -114,7 +121,6 @@ dependencies = [
  "tokio",
  "tokio-util",
  "tui-input",
- "unicodeit",
  "ureq",
  "walkdir",
 ]
@@ -132,21 +138,6 @@ dependencies = [
  "unscanny",
 ]
 
-[[package]]
-name = "bit-set"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
-dependencies = [
- "bit-vec",
-]
-
-[[package]]
-name = "bit-vec"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
-
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -426,17 +417,6 @@ dependencies = [
  "once_cell",
 ]
 
-[[package]]
-name = "fancy-regex"
-version = "0.16.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f"
-dependencies = [
- "bit-set",
- "regex-automata",
- "regex-syntax",
-]
-
 [[package]]
 name = "fastrand"
 version = "2.3.0"
@@ -893,6 +873,40 @@ version = "0.4.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
 
+[[package]]
+name = "logos"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154"
+dependencies = [
+ "logos-derive",
+]
+
+[[package]]
+name = "logos-codegen"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c"
+dependencies = [
+ "beef",
+ "fnv",
+ "lazy_static",
+ "proc-macro2",
+ "quote",
+ "regex-syntax",
+ "rustc_version",
+ "syn",
+]
+
+[[package]]
+name = "logos-derive"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470"
+dependencies = [
+ "logos-codegen",
+]
+
 [[package]]
 name = "lru"
 version = "0.12.5"
@@ -1126,6 +1140,49 @@ dependencies = [
  "indexmap",
 ]
 
+[[package]]
+name = "phf"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
+dependencies = [
+ "phf_macros",
+ "phf_shared",
+ "serde",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
+dependencies = [
+ "fastrand",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
+dependencies = [
+ "siphasher",
+]
+
 [[package]]
 name = "pin-project-lite"
 version = "0.2.16"
@@ -1329,6 +1386,15 @@ version = "0.1.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
 
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
 [[package]]
 name = "rustix"
 version = "0.38.44"
@@ -1417,6 +1483,12 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
 [[package]]
 name = "serde"
 version = "1.0.219"
@@ -1503,6 +1575,12 @@ version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
 
+[[package]]
+name = "siphasher"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
+
 [[package]]
 name = "slab"
 version = "0.4.10"
@@ -1891,18 +1969,6 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
 
-[[package]]
-name = "unicodeit"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1069c222ea63347e2e59763aa12d32c9c6a4e595931c7724a769f6a75bfbc553"
-dependencies = [
- "aho-corasick",
- "cfg-if",
- "memchr",
- "regex",
-]
-
 [[package]]
 name = "unscanny"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 2d596de..a01a7e7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,5 +39,5 @@ ureq = "2.12.1"
 serde = { version = "1.0.217", features = ["serde_derive"] }
 figment = { version = "0.10.19", features = [ "toml", "test" ]}
 owo-colors = "4.2.2"
-unicodeit = { version = "0.2.0", features = ["naive-impl"] }
-fancy-regex = "0.16.2"
+logos = "0.15.1"
+phf = { version = "0.13.1", features = ["macros"] }
diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 48046e9..37b0b01 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -26,7 +26,7 @@ use std::{fs, path::PathBuf};
 use walkdir::WalkDir;
 
 use crate::app;
-use crate::bibiman::sanitize::{mass_sanitize, sanitize_one_bibidata};
+use crate::bibiman::sanitize::sanitize_one_bibidata;
 use crate::cliargs::{self};
 use crate::config::BibiConfig;
 
@@ -159,9 +159,6 @@ impl BibiData {
     /// Generates the SanitizedBibiData for the BibiData.
     ///
     /// Consumes self and returns a new BibiData struct.
-    ///
-    /// If multiple SanitizedBibiData are to be generated,
-    /// one should use the [`mass_sanitize`] function instead.
     pub fn gen_sanitized(mut self) -> Self {
         self.sanitized_bibi_data = Some(sanitize_one_bibidata(&self));
         self
@@ -325,42 +322,41 @@ impl BibiSetup {
         // this may cause longer startup-load-times.
         //
         //
-        mass_sanitize(
-            citekeys
-                .iter()
-                .enumerate()
-                .map(|(i, k)| {
-                    let filepaths: (Option<Vec<OsString>>, bool) =
-                        { Self::get_filepath(k, bibliography, &mut pdf_files) };
-
-                    BibiData {
-                        id: i as u32,
-                        authors: Self::get_authors(k, bibliography),
-                        short_author: String::new(),
-                        title: Self::get_title(k, bibliography),
-                        year: Self::get_year(k, bibliography),
-                        custom_field: (
-                            cfg.general.custom_column.clone(),
-                            Self::get_custom_field(k, bibliography, &cfg.general.custom_column),
-                        ),
-                        keywords: Self::get_keywords(k, bibliography),
-                        citekey: k.to_owned(),
-                        abstract_text: Self::get_abstract(k, bibliography),
-                        doi_url: Self::get_weblink(k, bibliography),
-                        filepath: filepaths.0,
-                        file_field: filepaths.1,
-                        subtitle: Self::get_subtitle(k, bibliography),
-                        notes: if note_files.is_some() {
-                            Self::get_notepath(k, &mut note_files, &ext)
-                        } else {
-                            None
-                        },
-                        symbols: [None, None, None],
-                        sanitized_bibi_data: None,
-                    }
-                })
-                .collect(),
-        )
+        citekeys
+            .iter()
+            .enumerate()
+            .map(|(i, k)| {
+                let filepaths: (Option<Vec<OsString>>, bool) =
+                    { Self::get_filepath(k, bibliography, &mut pdf_files) };
+
+                BibiData {
+                    id: i as u32,
+                    authors: Self::get_authors(k, bibliography),
+                    short_author: String::new(),
+                    title: Self::get_title(k, bibliography),
+                    year: Self::get_year(k, bibliography),
+                    custom_field: (
+                        cfg.general.custom_column.clone(),
+                        Self::get_custom_field(k, bibliography, &cfg.general.custom_column),
+                    ),
+                    keywords: Self::get_keywords(k, bibliography),
+                    citekey: k.to_owned(),
+                    abstract_text: Self::get_abstract(k, bibliography),
+                    doi_url: Self::get_weblink(k, bibliography),
+                    filepath: filepaths.0,
+                    file_field: filepaths.1,
+                    subtitle: Self::get_subtitle(k, bibliography),
+                    notes: if note_files.is_some() {
+                        Self::get_notepath(k, &mut note_files, &ext)
+                    } else {
+                        None
+                    },
+                    symbols: [None, None, None],
+                    sanitized_bibi_data: None,
+                }
+                .gen_sanitized()
+            })
+            .collect()
     }
 
     // get list of citekeys from the given bibfile
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
index 823b91c..9ccf4c4 100644
--- a/src/bibiman/sanitize.rs
+++ b/src/bibiman/sanitize.rs
@@ -15,80 +15,23 @@
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 /////
 
-use fancy_regex::Regex;
-use unicodeit::replace as unicode_replace;
-
 use crate::bibiman::bibisetup::{BibiData, SanitizedBibiData};
 
-/// Sanitizing process rules as regex cmds.
-///
-/// Only macros that are not already covered by unicodeit should be processed in this way.
-///
-// Regex to capture content between brackets: `(\{(?:[^{}]++|(\1))*+\})`
-// Alternative without capturing the outer brackets: `\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}`
-//
-const SANITIZE_REGEX: &[(&str, &str)] = &[
-    (
-        r"\\mkbibquote\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}",
-        "\"${1}\"",
-    ),
-    (r"\\hyphen", "-"),
-];
-
-/// Function to build the sanitization regex vector:
-fn regex_vector() -> Vec<(Regex, &'static str)> {
-    let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len());
-    // build regex
-    for (search, replace) in SANITIZE_REGEX {
-        regex.push((Regex::new(search).unwrap(), replace));
-    }
-    regex
-}
-
-fn optimized_sanitize(input_text: Vec<&str>, regex: &Vec<(Regex, &str)>) -> Vec<String> {
-    let mut result: Vec<String> = input_text.into_iter().map(|s| s.to_string()).collect();
-
-    // process strings
-    let result_len = result.len();
-    for (re, replace) in regex {
-        for i in 0..result_len {
-            result[i] = re.replace_all(&result[i], *replace).to_string();
-        }
-    }
-    for i in 0..result_len {
-        result[i] = unicode_replace(&result[i]);
-    }
-
-    // return result
-    result
-}
+mod optimized_sanitize;
+use optimized_sanitize::optimized_sanitize;
 
 /// Helper macro to sanitize bibidata structs.
 /// Here lives the code that generates SanitizedBibiData
 /// structs from BibiData structs.
 macro_rules! optimized_sanitize_bibidata {
-    ($bibidata:expr, $regex:expr) => {
-        match &$bibidata.subtitle {
-            None => {
-                let sanitized_data =
-                    optimized_sanitize(vec![&$bibidata.title, &$bibidata.abstract_text], &$regex);
-                SanitizedBibiData {
-                    title: sanitized_data[0].clone(),
-                    subtitle: None,
-                    abstract_text: sanitized_data[1].clone(),
-                }
-            }
-            Some(subtitle) => {
-                let sanitized_data = optimized_sanitize(
-                    vec![&$bibidata.title, subtitle, &$bibidata.abstract_text],
-                    &$regex,
-                );
-                SanitizedBibiData {
-                    title: sanitized_data[0].clone(),
-                    subtitle: Some(sanitized_data[1].clone()),
-                    abstract_text: sanitized_data[2].clone(),
-                }
-            }
+    ($bibidata:expr) => {
+        SanitizedBibiData {
+            title: optimized_sanitize(&$bibidata.title),
+            subtitle: match &$bibidata.subtitle {
+                None => None,
+                Some(subtitle) => Some(optimized_sanitize(subtitle)),
+            },
+            abstract_text: optimized_sanitize(&$bibidata.abstract_text),
         }
     };
 }
@@ -96,20 +39,5 @@ macro_rules! optimized_sanitize_bibidata {
 /// Sanitize one BibiData and return a SanitizedBibiData struct.
 /// This function does ignore any existing sanitization of the bibidata struct.
 pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
-    let regex = regex_vector();
-    optimized_sanitize_bibidata!(bibidata, regex)
-}
-
-/// Sanitize a whole `Vec<BibiData>`, returning a new sanitized one.
-pub fn mass_sanitize(bibidata: Vec<BibiData>) -> Vec<BibiData> {
-    let regex: Vec<(Regex, &str)> = regex_vector();
-
-    let mut result: Vec<BibiData> = Vec::with_capacity(bibidata.len());
-    for entry in bibidata {
-        result.push(BibiData {
-            sanitized_bibi_data: Some(optimized_sanitize_bibidata!(entry, regex)),
-            ..entry
-        });
-    }
-    result
+    optimized_sanitize_bibidata!(bibidata)
 }
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
new file mode 100644
index 0000000..b3bf90d
--- /dev/null
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -0,0 +1,86 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025  lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use phf::phf_map;
+use std::collections::HashMap;
+
+use logos::Logos;
+
+static LOOKUP: phf::Map<&'static str, &'static str> = phf_map! {
+    " " => " ", // str a forced space should substitute to.
+};
+
+#[derive(Logos, Debug)]
+enum Token {
+    #[token("{")]
+    OpenCurlyBracket,
+    #[token("}")]
+    ClosedCurlyBracket,
+    #[regex(r"\\\w+")]
+    LaTeXMacro,
+    #[token(r"\ ")]
+    ForcedSpace,
+}
+
+pub fn optimized_sanitize(input_text: &str) -> String {
+    let mut out: Vec<&str> = Vec::new();
+    let mut bracket_counter: u32 = 0;
+    let mut counter_actions: HashMap<u32, String> = HashMap::new();
+    let mut lex = Token::lexer(input_text);
+    while let Some(sometoken) = lex.next() {
+        match sometoken {
+            Ok(token) => match token {
+                Token::ForcedSpace => {
+                    out.push(
+                        LOOKUP
+                            .get(" ")
+                            .expect("Something is wrong with the sanitization lookup table."),
+                    );
+                }
+                Token::OpenCurlyBracket => {
+                    bracket_counter.saturating_add(1);
+                    todo!();
+                }
+                Token::ClosedCurlyBracket => {
+                    bracket_counter.saturating_sub(1);
+                    todo!();
+                }
+                Token::LaTeXMacro => {
+                    todo!()
+                }
+            },
+            Err(_) => {
+                out.push(lex.slice());
+            }
+        }
+    }
+    out.into_iter().collect::<String>()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::optimized_sanitize;
+
+    #[test]
+    fn check_sanitization() {
+        let result = optimized_sanitize(
+            r"\mkbibquote{Intention} und \mkbibquote{Intentionen sind \hyphen\ bibquote.}",
+        );
+        println!("{}", result);
+        panic!("Tatütata!");
+    }
+}
-- 
cgit v1.2.3


From 3ba8f024577e52c51833cd34b07ad90d14cb6338 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Sat, 4 Oct 2025 12:00:11 +0200
Subject: macro-sani: Implemented new algorithm to replace macros.

---
 src/bibiman/sanitize/optimized_sanitize.rs | 61 +++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index b3bf90d..8788d39 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -20,8 +20,13 @@ use std::collections::HashMap;
 
 use logos::Logos;
 
-static LOOKUP: phf::Map<&'static str, &'static str> = phf_map! {
-    " " => " ", // str a forced space should substitute to.
+static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! {
+    r"\mkbibquote" => ("\"", Some("\"")),
+    r"\enquote*" => ("\'", Some("\'")),
+    r"\enquote" => ("\"", Some("\"")),
+    r"\hyphen" => ("-", None),
+    r"\textbf" => ("", Some("")),
+    r"\textit" => ("", Some("")),
 };
 
 #[derive(Logos, Debug)]
@@ -30,41 +35,59 @@ enum Token {
     OpenCurlyBracket,
     #[token("}")]
     ClosedCurlyBracket,
-    #[regex(r"\\\w+")]
+    #[regex(r"\\[\*\w]+")]
     LaTeXMacro,
     #[token(r"\ ")]
     ForcedSpace,
 }
 
 pub fn optimized_sanitize(input_text: &str) -> String {
-    let mut out: Vec<&str> = Vec::new();
+    let mut out: Vec<&str> = Vec::with_capacity(input_text.chars().count());
     let mut bracket_counter: u32 = 0;
-    let mut counter_actions: HashMap<u32, String> = HashMap::new();
+    let mut bc_up: bool = false;
+    let mut counter_actions: HashMap<u32, &str> = HashMap::new();
     let mut lex = Token::lexer(input_text);
     while let Some(sometoken) = lex.next() {
         match sometoken {
             Ok(token) => match token {
                 Token::ForcedSpace => {
-                    out.push(
-                        LOOKUP
-                            .get(" ")
-                            .expect("Something is wrong with the sanitization lookup table."),
-                    );
+                    out.push(" ");
+                    bc_up = false;
                 }
                 Token::OpenCurlyBracket => {
-                    bracket_counter.saturating_add(1);
-                    todo!();
+                    if bc_up {
+                        bracket_counter = bracket_counter.saturating_add(1);
+                    } else {
+                        out.push("{")
+                    }
                 }
                 Token::ClosedCurlyBracket => {
-                    bracket_counter.saturating_sub(1);
-                    todo!();
+                    if bracket_counter == 0 {
+                        out.push("}")
+                    } else {
+                        match counter_actions.remove(&bracket_counter) {
+                            None => out.push("}"),
+                            Some(a) => out.push(a),
+                        }
+                        bracket_counter = bracket_counter - 1;
+                    }
                 }
                 Token::LaTeXMacro => {
-                    todo!()
+                    let texmacro = lex.slice();
+                    if let Some(x) = LOOKUP.get(&texmacro) {
+                        if let Some(end) = x.1 {
+                            bc_up = true;
+                            counter_actions.insert(bracket_counter + 1, end);
+                        }
+                        out.push(x.0);
+                    } else {
+                        out.push(texmacro)
+                    }
                 }
             },
             Err(_) => {
                 out.push(lex.slice());
+                bc_up = false;
             }
         }
     }
@@ -78,9 +101,11 @@ mod tests {
     #[test]
     fn check_sanitization() {
         let result = optimized_sanitize(
-            r"\mkbibquote{Intention} und \mkbibquote{Intentionen sind \hyphen\ bibquote.}",
+            r"\mkbibquote{Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote.}",
         );
-        println!("{}", result);
-        panic!("Tatütata!");
+        assert_eq!(
+            "\"Intention\" und \"Intentionen \"sind\" - bibquote.\"",
+            result
+        )
     }
 }
-- 
cgit v1.2.3


From d80ce65ad5efb64fcce313a4c44b7f46fc5e7798 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Sat, 4 Oct 2025 12:30:22 +0200
Subject: macro-sani: skipping the algorithm, if no macro is in the string.

---
 src/bibiman/sanitize/optimized_sanitize.rs | 94 +++++++++++++++++-------------
 1 file changed, 53 insertions(+), 41 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index 8788d39..3a9dc67 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -42,56 +42,68 @@ enum Token {
 }
 
 pub fn optimized_sanitize(input_text: &str) -> String {
-    let mut out: Vec<&str> = Vec::with_capacity(input_text.chars().count());
-    let mut bracket_counter: u32 = 0;
-    let mut bc_up: bool = false;
-    let mut counter_actions: HashMap<u32, &str> = HashMap::new();
-    let mut lex = Token::lexer(input_text);
-    while let Some(sometoken) = lex.next() {
-        match sometoken {
-            Ok(token) => match token {
-                Token::ForcedSpace => {
-                    out.push(" ");
-                    bc_up = false;
-                }
-                Token::OpenCurlyBracket => {
-                    if bc_up {
-                        bracket_counter = bracket_counter.saturating_add(1);
-                    } else {
-                        out.push("{")
+    let mut char_counter: usize = 0;
+    let mut contains_macro: bool = false;
+    for char in input_text.chars() {
+        if char == '\\' {
+            contains_macro = true;
+        }
+        char_counter = char_counter.saturating_add(1);
+    }
+    if !contains_macro {
+        input_text.to_string()
+    } else {
+        let mut out: Vec<&str> = Vec::with_capacity(char_counter);
+        let mut bracket_counter: u32 = 0;
+        let mut bc_up: bool = false;
+        let mut counter_actions: HashMap<u32, &str> = HashMap::new();
+        let mut lex = Token::lexer(input_text);
+        while let Some(sometoken) = lex.next() {
+            match sometoken {
+                Ok(token) => match token {
+                    Token::ForcedSpace => {
+                        out.push(" ");
+                        bc_up = false;
                     }
-                }
-                Token::ClosedCurlyBracket => {
-                    if bracket_counter == 0 {
-                        out.push("}")
-                    } else {
-                        match counter_actions.remove(&bracket_counter) {
-                            None => out.push("}"),
-                            Some(a) => out.push(a),
+                    Token::OpenCurlyBracket => {
+                        if bc_up {
+                            bracket_counter = bracket_counter.saturating_add(1);
+                        } else {
+                            out.push("{")
                         }
-                        bracket_counter = bracket_counter - 1;
                     }
-                }
-                Token::LaTeXMacro => {
-                    let texmacro = lex.slice();
-                    if let Some(x) = LOOKUP.get(&texmacro) {
-                        if let Some(end) = x.1 {
-                            bc_up = true;
-                            counter_actions.insert(bracket_counter + 1, end);
+                    Token::ClosedCurlyBracket => {
+                        if bracket_counter == 0 {
+                            out.push("}")
+                        } else {
+                            match counter_actions.remove(&bracket_counter) {
+                                None => out.push("}"),
+                                Some(a) => out.push(a),
+                            }
+                            bracket_counter = bracket_counter - 1;
+                        }
+                    }
+                    Token::LaTeXMacro => {
+                        let texmacro = lex.slice();
+                        if let Some(x) = LOOKUP.get(&texmacro) {
+                            if let Some(end) = x.1 {
+                                bc_up = true;
+                                counter_actions.insert(bracket_counter + 1, end);
+                            }
+                            out.push(x.0);
+                        } else {
+                            out.push(texmacro)
                         }
-                        out.push(x.0);
-                    } else {
-                        out.push(texmacro)
                     }
+                },
+                Err(_) => {
+                    out.push(lex.slice());
+                    bc_up = false;
                 }
-            },
-            Err(_) => {
-                out.push(lex.slice());
-                bc_up = false;
             }
         }
+        out.into_iter().collect::<String>()
     }
-    out.into_iter().collect::<String>()
 }
 
 #[cfg(test)]
-- 
cgit v1.2.3


From f5adcd0fad71828646b5047c661a0d8524a3fc9c Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Sat, 4 Oct 2025 12:37:40 +0200
Subject: macro-sani: Fixed whitespace handling after latex macro.

---
 src/bibiman/sanitize/optimized_sanitize.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index 3a9dc67..8ee0115 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -35,7 +35,7 @@ enum Token {
     OpenCurlyBracket,
     #[token("}")]
     ClosedCurlyBracket,
-    #[regex(r"\\[\*\w]+")]
+    #[regex(r"\\[\*\w]+ ?")]
     LaTeXMacro,
     #[token(r"\ ")]
     ForcedSpace,
@@ -85,7 +85,7 @@ pub fn optimized_sanitize(input_text: &str) -> String {
                     }
                     Token::LaTeXMacro => {
                         let texmacro = lex.slice();
-                        if let Some(x) = LOOKUP.get(&texmacro) {
+                        if let Some(x) = LOOKUP.get(&texmacro.trim_end()) {
                             if let Some(end) = x.1 {
                                 bc_up = true;
                                 counter_actions.insert(bracket_counter + 1, end);
@@ -113,10 +113,10 @@ mod tests {
     #[test]
     fn check_sanitization() {
         let result = optimized_sanitize(
-            r"\mkbibquote{Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote.}",
+            r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}",
         );
         assert_eq!(
-            "\"Intention\" und \"Intentionen \"sind\" - bibquote.\"",
+            "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"",
             result
         )
     }
-- 
cgit v1.2.3


From 606716f064c1151ab9e8617ff76fd4b95f4a2c57 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Sun, 5 Oct 2025 11:59:24 +0200
Subject: add functions to make sanitized data from PR #57 visible in the
 information tab too

---
 src/bibiman/bibisetup.rs | 22 +++++++++++++++++++---
 src/tui/ui.rs            |  6 +++---
 2 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 37b0b01..b3f788c 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -173,7 +173,11 @@ impl BibiData {
     }
 
     pub fn title(&self) -> &str {
-        &self.title
+        if let Some(sani_data) = &self.sanitized_bibi_data {
+            &sani_data.title
+        } else {
+            &self.title
+        }
     }
 
     pub fn year(&self) -> &str {
@@ -204,8 +208,20 @@ impl BibiData {
             .collect_vec()
     }
 
-    pub fn subtitle(&self) -> &str {
-        self.subtitle.as_ref().unwrap()
+    pub fn subtitle(&self) -> Option<&str> {
+        if let Some(sani_data) = &self.sanitized_bibi_data {
+            sani_data.subtitle.as_ref().map(|s| s.as_str())
+        } else {
+            self.subtitle.as_ref().map(|s| s.as_str())
+        }
+    }
+
+    pub fn get_abstract(&self) -> &str {
+        if let Some(sani_data) = &self.sanitized_bibi_data {
+            &sani_data.abstract_text
+        } else {
+            &self.abstract_text
+        }
     }
 
     fn create_symbols(&self, cfg: &BibiConfig) -> [Option<String>; 3] {
diff --git a/src/tui/ui.rs b/src/tui/ui.rs
index 3e6e24c..87d8c29 100644
--- a/src/tui/ui.rs
+++ b/src/tui/ui.rs
@@ -894,7 +894,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
                     Style::new().fg(cfg.colors.author_color),
                 ),
             ]));
-            if cur_entry.subtitle.is_some() {
+            if let Some(subtitle) = cur_entry.subtitle() {
                 lines.push(Line::from(vec![
                     Span::styled("Title: ", style_value),
                     Span::styled(
@@ -910,7 +910,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
                             .add_modifier(Modifier::ITALIC),
                     ),
                     Span::styled(
-                        cur_entry.subtitle(),
+                        subtitle,
                         Style::new()
                             .fg(cfg.colors.title_color)
                             .add_modifier(Modifier::ITALIC),
@@ -999,7 +999,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
             }
             lines.push(Line::from(""));
             lines.push(Line::from(vec![Span::styled(
-                cur_entry.abstract_text.clone(),
+                cur_entry.get_abstract(),
                 Style::new().fg(cfg.colors.main_text_color),
             )]));
             lines
-- 
cgit v1.2.3


From f84ebacd1ea47b09c58dd1ef1eaaf70feaacbe0f Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Sun, 5 Oct 2025 13:16:26 +0200
Subject: add some further text macros to be hidden

---
 src/bibiman/sanitize/optimized_sanitize.rs | 2 ++
 tests/biblatex-test.bib                    | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index 8ee0115..336cc56 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -27,6 +27,8 @@ static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = ph
     r"\hyphen" => ("-", None),
     r"\textbf" => ("", Some("")),
     r"\textit" => ("", Some("")),
+    r"\texttt" => ("", Some("")),
+    r"\textsc" => ("", Some("")),
 };
 
 #[derive(Logos, Debug)]
diff --git a/tests/biblatex-test.bib b/tests/biblatex-test.bib
index fcc5085..2149e7c 100644
--- a/tests/biblatex-test.bib
+++ b/tests/biblatex-test.bib
@@ -107,7 +107,7 @@
 }
 
 @mvbook{aristotle_rhetoric,
-    title = {The Rhetoric of {Aristotle} with a commentary by the late {Edward
+    title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward
              Meredith Cope}},
     shorttitle = {Rhetoric},
     author = {Aristotle},
@@ -127,7 +127,7 @@
 }
 
 @book{augustine,
-    title = {Heterogeneous catalysis for the synthetic chemist},
+    title = {Heterogeneous catalysis for the synthetic \textit{chemist}},
     shorttitle = {Heterogeneous catalysis},
     author = {Augustine, Robert L.},
     location = {New York},
@@ -289,7 +289,7 @@
     annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{
                   doi} field. Note that the \textsc{doi} is transformed into a
                   clickable link if \texttt{hyperref} support has been enabled},
-    abstract = {The computation of ionic solvation free energies from atomistic
+    abstract = {The computation of \texttt{ionic} solvation free energies from atomistic
                 simulations is a surprisingly difficult problem that has found no
                 satisfactory solution for more than 15 years. The reason is that
                 the charging free energies evaluated from such simulations are
-- 
cgit v1.2.3