From 7350c7382bda85618c3dae1d74cc8cbe7ddd4b9d Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Tue, 30 Sep 2025 21:49:31 +0200
Subject: Implemented basic sanitizing.
The default Regex crate does not support the required regex features, so
I'm using the fancy_regex crate that does.
---
src/bibiman.rs | 3 ++
src/bibiman/sanitize.rs | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 76 insertions(+)
create mode 100644 src/bibiman/sanitize.rs
(limited to 'src')
diff --git a/src/bibiman.rs b/src/bibiman.rs
index 6d21f8c..c423ce1 100644
--- a/src/bibiman.rs
+++ b/src/bibiman.rs
@@ -44,6 +44,9 @@ pub mod entries;
pub mod keywords;
pub mod search;
+/// Module with function to sanitize text with LaTeX Macros into readable unicode text.
+pub mod sanitize;
+
// Areas in which actions are possible
#[derive(Debug)]
pub enum CurrentArea {
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
new file mode 100644
index 0000000..aaf81ad
--- /dev/null
+++ b/src/bibiman/sanitize.rs
@@ -0,0 +1,73 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025 lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+/////
+
+use fancy_regex::Regex;
+use unicodeit::replace as unicode_replace;
+
+/// Sanitizing process rules as regex cmds.
+///
+/// Only macros that are not already covered by unicodeit should be processed in this way.
+///
+// Regex to capture content between brackets: `(\{(?:[^{}]++|(\1))*+\})`
+// Alternative without capturing the outer brackets: `\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}`
+//
+const SANITIZE_REGEX: &[(&str, &str)] = &[
+ (
+ r"\\mkbibquote\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}",
+ "\"${1}\"",
+ ),
+ (r"\\hyphen", "-"),
+];
+
+/// Sanitize one String with LaTeX Macros into a more readable one without.
+///
+/// If one is going to mass-sanitize strings, one should use the [`sanitize`]
+/// function for performance reasons instead, to process multiple strings at once.
+///
+/// This is just a shortcut for the sanitize function.
+pub fn sanitize_one(input_text: &str) -> String {
+ // This does not panic, the sanitize function always returns
+ // as many elements in the returned list as it get's elements
+ // in the input vector.
+ sanitize(vec![input_text]).get(0).unwrap().to_string()
+}
+
+/// Sanitize multiple Strings with LaTeX Macros into more readable unicode equivalents.
+///
+/// This function does always return the same amount of Strings as it gets in the input list.
+pub fn sanitize(input_text: Vec<&str>) -> Vec {
+ let mut result: Vec = input_text.into_iter().map(|s| s.to_string()).collect();
+ let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len());
+ // build regex
+ for (search, replace) in SANITIZE_REGEX {
+ regex.push((Regex::new(search).unwrap(), replace));
+ }
+
+ // process strings
+ let result_len = result.len();
+ for (re, replace) in regex {
+ for i in 0..result_len {
+ result[i] = re.replace_all(&result[i], replace).to_string();
+ }
+ }
+ for i in 0..result_len {
+ result[i] = unicode_replace(&result[i]);
+ }
+
+ // return result
+ result
+}
--
cgit v1.2.3
From dfb7edde13ca39af3e23b80e40272e02aa093919 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Fri, 3 Oct 2025 12:37:16 +0200
Subject: Sanitization hooked into bibiman.
---
src/bibiman/bibisetup.rs | 27 ++++++++++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)
(limited to 'src')
diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 3bcb717..8466169 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -26,6 +26,7 @@ use std::{fs, path::PathBuf};
use walkdir::WalkDir;
use crate::app;
+use crate::bibiman::sanitize::sanitize_one;
use crate::cliargs::{self};
use crate::config::BibiConfig;
@@ -294,11 +295,28 @@ impl BibiSetup {
let filepaths: (Option>, bool) =
{ Self::get_filepath(k, bibliography, &mut pdf_files) };
+ // bibiman will sanitize some fields at this point,
+ // this may cause longer startup-load-times.
+ //
+ // It may be better to sanitize them somewhere else, so bibiman
+ // does not loose the original text-information including the
+ // LaTeX macros present in the bibfile. From here on, they will be
+ // gone.
+ //
+ // The following fields are going to be sanitized:
+ //
+ // - title
+ // - subtitle
+ // - abstract_text
+ //
+ // TODO: Once the final decision to implement the sanitization at
+ // this point, one could write a constructor for the BibiData struct
+ // which handles the sanitization.
BibiData {
id: i as u32,
authors: Self::get_authors(k, bibliography),
short_author: String::new(),
- title: Self::get_title(k, bibliography),
+ title: sanitize_one(&Self::get_title(k, bibliography)),
year: Self::get_year(k, bibliography),
custom_field: (
cfg.general.custom_column.clone(),
@@ -306,11 +324,14 @@ impl BibiSetup {
),
keywords: Self::get_keywords(k, bibliography),
citekey: k.to_owned(),
- abstract_text: Self::get_abstract(k, bibliography),
+ abstract_text: sanitize_one(&Self::get_abstract(k, bibliography)),
doi_url: Self::get_weblink(k, bibliography),
filepath: filepaths.0,
file_field: filepaths.1,
- subtitle: Self::get_subtitle(k, bibliography),
+ subtitle: match Self::get_subtitle(k, bibliography) {
+ None => None,
+ Some(x) => Some(sanitize_one(&x)),
+ },
notes: if note_files.is_some() {
Self::get_notepath(k, &mut note_files, &ext)
} else {
--
cgit v1.2.3
From 26befd38aedbfdd278c3096644baf69e4a1fb051 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Fri, 3 Oct 2025 16:56:30 +0200
Subject: Now storing the sanitized data seperately, keeping the original.
---
src/bibiman/bibisetup.rs | 154 ++++++++++++++++++++++++++---------------------
src/bibiman/entries.rs | 8 ++-
src/bibiman/sanitize.rs | 96 +++++++++++++++++++++++------
src/bibiman/search.rs | 4 +-
4 files changed, 175 insertions(+), 87 deletions(-)
(limited to 'src')
diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 8466169..48046e9 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -26,7 +26,7 @@ use std::{fs, path::PathBuf};
use walkdir::WalkDir;
use crate::app;
-use crate::bibiman::sanitize::sanitize_one;
+use crate::bibiman::sanitize::{mass_sanitize, sanitize_one_bibidata};
use crate::cliargs::{self};
use crate::config::BibiConfig;
@@ -77,6 +77,18 @@ pub struct BibiData {
pub subtitle: Option,
pub notes: Option>,
pub symbols: [Option; 3],
+ /// This field should be set to None when initially creating a BibiData instance.
+ /// It then can be generated from the constructed BibiData Object using
+ /// `BibiData::gen_sanitized()`
+ pub sanitized_bibi_data: Option,
+}
+
+/// Struct that holds sanitized bibidata data.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct SanitizedBibiData {
+ pub title: String,
+ pub subtitle: Option,
+ pub abstract_text: String,
}
#[derive(Debug, Clone, PartialEq)]
@@ -120,22 +132,41 @@ impl BibiData {
// self.pubtype(),
// &self.symbols,
// ]
-
- BibiRow {
- authors: {
- if self.short_author.is_empty() {
- self.authors()
- } else {
- &self.short_author
- }
- },
- title: self.title(),
- year: self.year(),
- custom_field_value: self.custom_field_value(),
- symbols: &self.symbols,
+ let author_ref = if self.short_author.is_empty() {
+ self.authors()
+ } else {
+ &self.short_author
+ };
+ if let Some(sanidata) = &self.sanitized_bibi_data {
+ BibiRow {
+ authors: author_ref,
+ title: &sanidata.title,
+ year: self.year(),
+ custom_field_value: self.custom_field_value(),
+ symbols: &self.symbols,
+ }
+ } else {
+ BibiRow {
+ authors: author_ref,
+ title: self.title(),
+ year: self.year(),
+ custom_field_value: self.custom_field_value(),
+ symbols: &self.symbols,
+ }
}
}
+ /// Generates the SanitizedBibiData for the BibiData.
+ ///
+ /// Consumes self and returns a new BibiData struct.
+ ///
+ /// If multiple SanitizedBibiData are to be generated,
+ /// one should use the [`mass_sanitize`] function instead.
+ pub fn gen_sanitized(mut self) -> Self {
+ self.sanitized_bibi_data = Some(sanitize_one_bibidata(&self));
+ self
+ }
+
pub fn entry_id(&self) -> &u32 {
&self.id
}
@@ -288,59 +319,48 @@ impl BibiSetup {
} else {
None
};
- citekeys
- .iter()
- .enumerate()
- .map(|(i, k)| {
- let filepaths: (Option>, bool) =
- { Self::get_filepath(k, bibliography, &mut pdf_files) };
-
- // bibiman will sanitize some fields at this point,
- // this may cause longer startup-load-times.
- //
- // It may be better to sanitize them somewhere else, so bibiman
- // does not loose the original text-information including the
- // LaTeX macros present in the bibfile. From here on, they will be
- // gone.
- //
- // The following fields are going to be sanitized:
- //
- // - title
- // - subtitle
- // - abstract_text
- //
- // TODO: Once the final decision to implement the sanitization at
- // this point, one could write a constructor for the BibiData struct
- // which handles the sanitization.
- BibiData {
- id: i as u32,
- authors: Self::get_authors(k, bibliography),
- short_author: String::new(),
- title: sanitize_one(&Self::get_title(k, bibliography)),
- year: Self::get_year(k, bibliography),
- custom_field: (
- cfg.general.custom_column.clone(),
- Self::get_custom_field(k, bibliography, &cfg.general.custom_column),
- ),
- keywords: Self::get_keywords(k, bibliography),
- citekey: k.to_owned(),
- abstract_text: sanitize_one(&Self::get_abstract(k, bibliography)),
- doi_url: Self::get_weblink(k, bibliography),
- filepath: filepaths.0,
- file_field: filepaths.1,
- subtitle: match Self::get_subtitle(k, bibliography) {
- None => None,
- Some(x) => Some(sanitize_one(&x)),
- },
- notes: if note_files.is_some() {
- Self::get_notepath(k, &mut note_files, &ext)
- } else {
- None
- },
- symbols: [None, None, None],
- }
- })
- .collect()
+ //
+ //
+ // bibiman will sanitize some fields at this point,
+ // this may cause longer startup-load-times.
+ //
+ //
+ mass_sanitize(
+ citekeys
+ .iter()
+ .enumerate()
+ .map(|(i, k)| {
+ let filepaths: (Option>, bool) =
+ { Self::get_filepath(k, bibliography, &mut pdf_files) };
+
+ BibiData {
+ id: i as u32,
+ authors: Self::get_authors(k, bibliography),
+ short_author: String::new(),
+ title: Self::get_title(k, bibliography),
+ year: Self::get_year(k, bibliography),
+ custom_field: (
+ cfg.general.custom_column.clone(),
+ Self::get_custom_field(k, bibliography, &cfg.general.custom_column),
+ ),
+ keywords: Self::get_keywords(k, bibliography),
+ citekey: k.to_owned(),
+ abstract_text: Self::get_abstract(k, bibliography),
+ doi_url: Self::get_weblink(k, bibliography),
+ filepath: filepaths.0,
+ file_field: filepaths.1,
+ subtitle: Self::get_subtitle(k, bibliography),
+ notes: if note_files.is_some() {
+ Self::get_notepath(k, &mut note_files, &ext)
+ } else {
+ None
+ },
+ symbols: [None, None, None],
+ sanitized_bibi_data: None,
+ }
+ })
+ .collect(),
+ )
}
// get list of citekeys from the given bibfile
diff --git a/src/bibiman/entries.rs b/src/bibiman/entries.rs
index db6d6bf..0b35a8b 100644
--- a/src/bibiman/entries.rs
+++ b/src/bibiman/entries.rs
@@ -174,7 +174,9 @@ mod tests {
subtitle: None,
notes: None,
symbols: [None, None, None],
- };
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized();
let entry_vec = BibiData::ref_vec(&mut entry, &cfg);
@@ -194,7 +196,9 @@ mod tests {
subtitle: None,
notes: None,
symbols: [None, None, None],
- };
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized();
let entry_vec_editors = BibiData::ref_vec(&mut entry_editors, &cfg);
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
index aaf81ad..614ed11 100644
--- a/src/bibiman/sanitize.rs
+++ b/src/bibiman/sanitize.rs
@@ -18,6 +18,8 @@
use fancy_regex::Regex;
use unicodeit::replace as unicode_replace;
+use crate::bibiman::bibisetup::{BibiData, SanitizedBibiData};
+
/// Sanitizing process rules as regex cmds.
///
/// Only macros that are not already covered by unicodeit should be processed in this way.
@@ -33,6 +35,71 @@ const SANITIZE_REGEX: &[(&str, &str)] = &[
(r"\\hyphen", "-"),
];
+/// Function to build the sanitization regex vector:
+fn regex_vector() -> Vec<(Regex, &'static str)> {
+ let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len());
+ // build regex
+ for (search, replace) in SANITIZE_REGEX {
+ regex.push((Regex::new(search).unwrap(), replace));
+ }
+ regex
+}
+
+fn optimized_sanitize(input_text: Vec<&str>, regex: &Vec<(Regex, &str)>) -> Vec {
+ let mut result: Vec = input_text.into_iter().map(|s| s.to_string()).collect();
+
+ // process strings
+ let result_len = result.len();
+ for (re, replace) in regex {
+ for i in 0..result_len {
+ result[i] = re.replace_all(&result[i], *replace).to_string();
+ }
+ }
+ for i in 0..result_len {
+ result[i] = unicode_replace(&result[i]);
+ }
+
+ // return result
+ result
+}
+
+/// Helper macro to sanitize bibidata structs.
+/// Here lives the code that generates SanitizedBibiData
+/// structs from BibiData structs.
+macro_rules! optimized_sanitize_bibidata {
+ ($bibidata:expr, $regex:expr) => {
+ match &$bibidata.subtitle {
+ None => {
+ let sanitized_data =
+ optimized_sanitize(vec![&$bibidata.title, &$bibidata.abstract_text], &$regex);
+ SanitizedBibiData {
+ title: sanitized_data[0].clone(),
+ subtitle: None,
+ abstract_text: sanitized_data[1].clone(),
+ }
+ }
+ Some(subtitle) => {
+ let sanitized_data = optimized_sanitize(
+ vec![&$bibidata.title, subtitle, &$bibidata.abstract_text],
+ &$regex,
+ );
+ SanitizedBibiData {
+ title: sanitized_data[0].clone(),
+ subtitle: Some(sanitized_data[1].clone()),
+ abstract_text: sanitized_data[2].clone(),
+ }
+ }
+ }
+ };
+}
+
+/// Sanitize one BibiData and return a SanitizedBibiData struct.
+/// This function does ignore any existing sanitization of the bibidata struct.
+pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
+ let regex = regex_vector();
+ optimized_sanitize_bibidata!(bibidata, regex)
+}
+
/// Sanitize one String with LaTeX Macros into a more readable one without.
///
/// If one is going to mass-sanitize strings, one should use the [`sanitize`]
@@ -50,24 +117,19 @@ pub fn sanitize_one(input_text: &str) -> String {
///
/// This function does always return the same amount of Strings as it gets in the input list.
pub fn sanitize(input_text: Vec<&str>) -> Vec {
- let mut result: Vec = input_text.into_iter().map(|s| s.to_string()).collect();
- let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len());
- // build regex
- for (search, replace) in SANITIZE_REGEX {
- regex.push((Regex::new(search).unwrap(), replace));
- }
+ optimized_sanitize(input_text, ®ex_vector())
+}
- // process strings
- let result_len = result.len();
- for (re, replace) in regex {
- for i in 0..result_len {
- result[i] = re.replace_all(&result[i], replace).to_string();
- }
- }
- for i in 0..result_len {
- result[i] = unicode_replace(&result[i]);
- }
+/// Sanitize a whole `Vec`, returning a new sanitized one.
+pub fn mass_sanitize(bibidata: Vec) -> Vec {
+ let regex: Vec<(Regex, &str)> = regex_vector();
- // return result
+ let mut result: Vec = Vec::with_capacity(bibidata.len());
+ for entry in bibidata {
+ result.push(BibiData {
+ sanitized_bibi_data: Some(optimized_sanitize_bibidata!(entry, regex)),
+ ..entry
+ });
+ }
result
}
diff --git a/src/bibiman/search.rs b/src/bibiman/search.rs
index e0c5f17..2156634 100644
--- a/src/bibiman/search.rs
+++ b/src/bibiman/search.rs
@@ -141,7 +141,9 @@ mod tests {
subtitle: None,
notes: None,
symbols: [None, None, None],
- };
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized();
let joined_vec = BibiSearch::convert_to_string(&bibvec);
--
cgit v1.2.3
From 161fc7010cb863e1af534ce1d173136401816a32 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Fri, 3 Oct 2025 18:19:25 +0200
Subject: Removed unused sanitization functions.
---
src/bibiman/sanitize.rs | 20 --------------------
1 file changed, 20 deletions(-)
(limited to 'src')
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
index 614ed11..823b91c 100644
--- a/src/bibiman/sanitize.rs
+++ b/src/bibiman/sanitize.rs
@@ -100,26 +100,6 @@ pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
optimized_sanitize_bibidata!(bibidata, regex)
}
-/// Sanitize one String with LaTeX Macros into a more readable one without.
-///
-/// If one is going to mass-sanitize strings, one should use the [`sanitize`]
-/// function for performance reasons instead, to process multiple strings at once.
-///
-/// This is just a shortcut for the sanitize function.
-pub fn sanitize_one(input_text: &str) -> String {
- // This does not panic, the sanitize function always returns
- // as many elements in the returned list as it get's elements
- // in the input vector.
- sanitize(vec![input_text]).get(0).unwrap().to_string()
-}
-
-/// Sanitize multiple Strings with LaTeX Macros into more readable unicode equivalents.
-///
-/// This function does always return the same amount of Strings as it gets in the input list.
-pub fn sanitize(input_text: Vec<&str>) -> Vec {
- optimized_sanitize(input_text, ®ex_vector())
-}
-
/// Sanitize a whole `Vec`, returning a new sanitized one.
pub fn mass_sanitize(bibidata: Vec) -> Vec {
let regex: Vec<(Regex, &str)> = regex_vector();
--
cgit v1.2.3
From 624977bb9fd209b0c7c5f60a1332718de1d460d4 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Fri, 3 Oct 2025 22:57:37 +0200
Subject: macro-sani: started impl new algorithm
---
Cargo.lock | 146 +++++++++++++++++++++--------
Cargo.toml | 4 +-
src/bibiman/bibisetup.rs | 76 +++++++--------
src/bibiman/sanitize.rs | 94 +++----------------
src/bibiman/sanitize/optimized_sanitize.rs | 86 +++++++++++++++++
5 files changed, 241 insertions(+), 165 deletions(-)
create mode 100644 src/bibiman/sanitize/optimized_sanitize.rs
(limited to 'src')
diff --git a/Cargo.lock b/Cargo.lock
index 535b929..22a5a48 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -89,6 +89,12 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+[[package]]
+name = "beef"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
+
[[package]]
name = "bibiman"
version = "0.14.1"
@@ -99,13 +105,14 @@ dependencies = [
"crossterm",
"dirs",
"editor-command",
- "fancy-regex",
"figment",
"futures",
"itertools",
"lexopt",
+ "logos",
"nucleo-matcher",
"owo-colors",
+ "phf",
"rand",
"ratatui",
"regex",
@@ -114,7 +121,6 @@ dependencies = [
"tokio",
"tokio-util",
"tui-input",
- "unicodeit",
"ureq",
"walkdir",
]
@@ -132,21 +138,6 @@ dependencies = [
"unscanny",
]
-[[package]]
-name = "bit-set"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
-dependencies = [
- "bit-vec",
-]
-
-[[package]]
-name = "bit-vec"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
-
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -426,17 +417,6 @@ dependencies = [
"once_cell",
]
-[[package]]
-name = "fancy-regex"
-version = "0.16.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f"
-dependencies = [
- "bit-set",
- "regex-automata",
- "regex-syntax",
-]
-
[[package]]
name = "fastrand"
version = "2.3.0"
@@ -893,6 +873,40 @@ version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+[[package]]
+name = "logos"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154"
+dependencies = [
+ "logos-derive",
+]
+
+[[package]]
+name = "logos-codegen"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c"
+dependencies = [
+ "beef",
+ "fnv",
+ "lazy_static",
+ "proc-macro2",
+ "quote",
+ "regex-syntax",
+ "rustc_version",
+ "syn",
+]
+
+[[package]]
+name = "logos-derive"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470"
+dependencies = [
+ "logos-codegen",
+]
+
[[package]]
name = "lru"
version = "0.12.5"
@@ -1126,6 +1140,49 @@ dependencies = [
"indexmap",
]
+[[package]]
+name = "phf"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
+dependencies = [
+ "phf_macros",
+ "phf_shared",
+ "serde",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
+dependencies = [
+ "fastrand",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
+dependencies = [
+ "siphasher",
+]
+
[[package]]
name = "pin-project-lite"
version = "0.2.16"
@@ -1329,6 +1386,15 @@ version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
[[package]]
name = "rustix"
version = "0.38.44"
@@ -1417,6 +1483,12 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
[[package]]
name = "serde"
version = "1.0.219"
@@ -1503,6 +1575,12 @@ version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+[[package]]
+name = "siphasher"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
+
[[package]]
name = "slab"
version = "0.4.10"
@@ -1891,18 +1969,6 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
-[[package]]
-name = "unicodeit"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1069c222ea63347e2e59763aa12d32c9c6a4e595931c7724a769f6a75bfbc553"
-dependencies = [
- "aho-corasick",
- "cfg-if",
- "memchr",
- "regex",
-]
-
[[package]]
name = "unscanny"
version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 2d596de..a01a7e7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,5 +39,5 @@ ureq = "2.12.1"
serde = { version = "1.0.217", features = ["serde_derive"] }
figment = { version = "0.10.19", features = [ "toml", "test" ]}
owo-colors = "4.2.2"
-unicodeit = { version = "0.2.0", features = ["naive-impl"] }
-fancy-regex = "0.16.2"
+logos = "0.15.1"
+phf = { version = "0.13.1", features = ["macros"] }
diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 48046e9..37b0b01 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -26,7 +26,7 @@ use std::{fs, path::PathBuf};
use walkdir::WalkDir;
use crate::app;
-use crate::bibiman::sanitize::{mass_sanitize, sanitize_one_bibidata};
+use crate::bibiman::sanitize::sanitize_one_bibidata;
use crate::cliargs::{self};
use crate::config::BibiConfig;
@@ -159,9 +159,6 @@ impl BibiData {
/// Generates the SanitizedBibiData for the BibiData.
///
/// Consumes self and returns a new BibiData struct.
- ///
- /// If multiple SanitizedBibiData are to be generated,
- /// one should use the [`mass_sanitize`] function instead.
pub fn gen_sanitized(mut self) -> Self {
self.sanitized_bibi_data = Some(sanitize_one_bibidata(&self));
self
@@ -325,42 +322,41 @@ impl BibiSetup {
// this may cause longer startup-load-times.
//
//
- mass_sanitize(
- citekeys
- .iter()
- .enumerate()
- .map(|(i, k)| {
- let filepaths: (Option>, bool) =
- { Self::get_filepath(k, bibliography, &mut pdf_files) };
-
- BibiData {
- id: i as u32,
- authors: Self::get_authors(k, bibliography),
- short_author: String::new(),
- title: Self::get_title(k, bibliography),
- year: Self::get_year(k, bibliography),
- custom_field: (
- cfg.general.custom_column.clone(),
- Self::get_custom_field(k, bibliography, &cfg.general.custom_column),
- ),
- keywords: Self::get_keywords(k, bibliography),
- citekey: k.to_owned(),
- abstract_text: Self::get_abstract(k, bibliography),
- doi_url: Self::get_weblink(k, bibliography),
- filepath: filepaths.0,
- file_field: filepaths.1,
- subtitle: Self::get_subtitle(k, bibliography),
- notes: if note_files.is_some() {
- Self::get_notepath(k, &mut note_files, &ext)
- } else {
- None
- },
- symbols: [None, None, None],
- sanitized_bibi_data: None,
- }
- })
- .collect(),
- )
+ citekeys
+ .iter()
+ .enumerate()
+ .map(|(i, k)| {
+ let filepaths: (Option>, bool) =
+ { Self::get_filepath(k, bibliography, &mut pdf_files) };
+
+ BibiData {
+ id: i as u32,
+ authors: Self::get_authors(k, bibliography),
+ short_author: String::new(),
+ title: Self::get_title(k, bibliography),
+ year: Self::get_year(k, bibliography),
+ custom_field: (
+ cfg.general.custom_column.clone(),
+ Self::get_custom_field(k, bibliography, &cfg.general.custom_column),
+ ),
+ keywords: Self::get_keywords(k, bibliography),
+ citekey: k.to_owned(),
+ abstract_text: Self::get_abstract(k, bibliography),
+ doi_url: Self::get_weblink(k, bibliography),
+ filepath: filepaths.0,
+ file_field: filepaths.1,
+ subtitle: Self::get_subtitle(k, bibliography),
+ notes: if note_files.is_some() {
+ Self::get_notepath(k, &mut note_files, &ext)
+ } else {
+ None
+ },
+ symbols: [None, None, None],
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized()
+ })
+ .collect()
}
// get list of citekeys from the given bibfile
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
index 823b91c..9ccf4c4 100644
--- a/src/bibiman/sanitize.rs
+++ b/src/bibiman/sanitize.rs
@@ -15,80 +15,23 @@
// along with this program. If not, see .
/////
-use fancy_regex::Regex;
-use unicodeit::replace as unicode_replace;
-
use crate::bibiman::bibisetup::{BibiData, SanitizedBibiData};
-/// Sanitizing process rules as regex cmds.
-///
-/// Only macros that are not already covered by unicodeit should be processed in this way.
-///
-// Regex to capture content between brackets: `(\{(?:[^{}]++|(\1))*+\})`
-// Alternative without capturing the outer brackets: `\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}`
-//
-const SANITIZE_REGEX: &[(&str, &str)] = &[
- (
- r"\\mkbibquote\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}",
- "\"${1}\"",
- ),
- (r"\\hyphen", "-"),
-];
-
-/// Function to build the sanitization regex vector:
-fn regex_vector() -> Vec<(Regex, &'static str)> {
- let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len());
- // build regex
- for (search, replace) in SANITIZE_REGEX {
- regex.push((Regex::new(search).unwrap(), replace));
- }
- regex
-}
-
-fn optimized_sanitize(input_text: Vec<&str>, regex: &Vec<(Regex, &str)>) -> Vec {
- let mut result: Vec = input_text.into_iter().map(|s| s.to_string()).collect();
-
- // process strings
- let result_len = result.len();
- for (re, replace) in regex {
- for i in 0..result_len {
- result[i] = re.replace_all(&result[i], *replace).to_string();
- }
- }
- for i in 0..result_len {
- result[i] = unicode_replace(&result[i]);
- }
-
- // return result
- result
-}
+mod optimized_sanitize;
+use optimized_sanitize::optimized_sanitize;
/// Helper macro to sanitize bibidata structs.
/// Here lives the code that generates SanitizedBibiData
/// structs from BibiData structs.
macro_rules! optimized_sanitize_bibidata {
- ($bibidata:expr, $regex:expr) => {
- match &$bibidata.subtitle {
- None => {
- let sanitized_data =
- optimized_sanitize(vec![&$bibidata.title, &$bibidata.abstract_text], &$regex);
- SanitizedBibiData {
- title: sanitized_data[0].clone(),
- subtitle: None,
- abstract_text: sanitized_data[1].clone(),
- }
- }
- Some(subtitle) => {
- let sanitized_data = optimized_sanitize(
- vec![&$bibidata.title, subtitle, &$bibidata.abstract_text],
- &$regex,
- );
- SanitizedBibiData {
- title: sanitized_data[0].clone(),
- subtitle: Some(sanitized_data[1].clone()),
- abstract_text: sanitized_data[2].clone(),
- }
- }
+ ($bibidata:expr) => {
+ SanitizedBibiData {
+ title: optimized_sanitize(&$bibidata.title),
+ subtitle: match &$bibidata.subtitle {
+ None => None,
+ Some(subtitle) => Some(optimized_sanitize(subtitle)),
+ },
+ abstract_text: optimized_sanitize(&$bibidata.abstract_text),
}
};
}
@@ -96,20 +39,5 @@ macro_rules! optimized_sanitize_bibidata {
/// Sanitize one BibiData and return a SanitizedBibiData struct.
/// This function does ignore any existing sanitization of the bibidata struct.
pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
- let regex = regex_vector();
- optimized_sanitize_bibidata!(bibidata, regex)
-}
-
-/// Sanitize a whole `Vec`, returning a new sanitized one.
-pub fn mass_sanitize(bibidata: Vec) -> Vec {
- let regex: Vec<(Regex, &str)> = regex_vector();
-
- let mut result: Vec = Vec::with_capacity(bibidata.len());
- for entry in bibidata {
- result.push(BibiData {
- sanitized_bibi_data: Some(optimized_sanitize_bibidata!(entry, regex)),
- ..entry
- });
- }
- result
+ optimized_sanitize_bibidata!(bibidata)
}
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
new file mode 100644
index 0000000..b3bf90d
--- /dev/null
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -0,0 +1,86 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025 lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+/////
+
+use phf::phf_map;
+use std::collections::HashMap;
+
+use logos::Logos;
+
+static LOOKUP: phf::Map<&'static str, &'static str> = phf_map! {
+ " " => " ", // str a forced space should substitute to.
+};
+
+#[derive(Logos, Debug)]
+enum Token {
+ #[token("{")]
+ OpenCurlyBracket,
+ #[token("}")]
+ ClosedCurlyBracket,
+ #[regex(r"\\\w+")]
+ LaTeXMacro,
+ #[token(r"\ ")]
+ ForcedSpace,
+}
+
+pub fn optimized_sanitize(input_text: &str) -> String {
+ let mut out: Vec<&str> = Vec::new();
+ let mut bracket_counter: u32 = 0;
+ let mut counter_actions: HashMap = HashMap::new();
+ let mut lex = Token::lexer(input_text);
+ while let Some(sometoken) = lex.next() {
+ match sometoken {
+ Ok(token) => match token {
+ Token::ForcedSpace => {
+ out.push(
+ LOOKUP
+ .get(" ")
+ .expect("Something is wrong with the sanitization lookup table."),
+ );
+ }
+ Token::OpenCurlyBracket => {
+ bracket_counter.saturating_add(1);
+ todo!();
+ }
+ Token::ClosedCurlyBracket => {
+ bracket_counter.saturating_sub(1);
+ todo!();
+ }
+ Token::LaTeXMacro => {
+ todo!()
+ }
+ },
+ Err(_) => {
+ out.push(lex.slice());
+ }
+ }
+ }
+ out.into_iter().collect::()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::optimized_sanitize;
+
+ #[test]
+ fn check_sanitization() {
+ let result = optimized_sanitize(
+ r"\mkbibquote{Intention} und \mkbibquote{Intentionen sind \hyphen\ bibquote.}",
+ );
+ println!("{}", result);
+ panic!("Tatütata!");
+ }
+}
--
cgit v1.2.3
From 3ba8f024577e52c51833cd34b07ad90d14cb6338 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Sat, 4 Oct 2025 12:00:11 +0200
Subject: macro-sani: Implemented new algorithm to replace macros.
---
src/bibiman/sanitize/optimized_sanitize.rs | 61 +++++++++++++++++++++---------
1 file changed, 43 insertions(+), 18 deletions(-)
(limited to 'src')
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index b3bf90d..8788d39 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -20,8 +20,13 @@ use std::collections::HashMap;
use logos::Logos;
-static LOOKUP: phf::Map<&'static str, &'static str> = phf_map! {
- " " => " ", // str a forced space should substitute to.
+static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! {
+ r"\mkbibquote" => ("\"", Some("\"")),
+ r"\enquote*" => ("\'", Some("\'")),
+ r"\enquote" => ("\"", Some("\"")),
+ r"\hyphen" => ("-", None),
+ r"\textbf" => ("", Some("")),
+ r"\textit" => ("", Some("")),
};
#[derive(Logos, Debug)]
@@ -30,41 +35,59 @@ enum Token {
OpenCurlyBracket,
#[token("}")]
ClosedCurlyBracket,
- #[regex(r"\\\w+")]
+ #[regex(r"\\[\*\w]+")]
LaTeXMacro,
#[token(r"\ ")]
ForcedSpace,
}
pub fn optimized_sanitize(input_text: &str) -> String {
- let mut out: Vec<&str> = Vec::new();
+ let mut out: Vec<&str> = Vec::with_capacity(input_text.chars().count());
let mut bracket_counter: u32 = 0;
- let mut counter_actions: HashMap = HashMap::new();
+ let mut bc_up: bool = false;
+ let mut counter_actions: HashMap = HashMap::new();
let mut lex = Token::lexer(input_text);
while let Some(sometoken) = lex.next() {
match sometoken {
Ok(token) => match token {
Token::ForcedSpace => {
- out.push(
- LOOKUP
- .get(" ")
- .expect("Something is wrong with the sanitization lookup table."),
- );
+ out.push(" ");
+ bc_up = false;
}
Token::OpenCurlyBracket => {
- bracket_counter.saturating_add(1);
- todo!();
+ if bc_up {
+ bracket_counter = bracket_counter.saturating_add(1);
+ } else {
+ out.push("{")
+ }
}
Token::ClosedCurlyBracket => {
- bracket_counter.saturating_sub(1);
- todo!();
+ if bracket_counter == 0 {
+ out.push("}")
+ } else {
+ match counter_actions.remove(&bracket_counter) {
+ None => out.push("}"),
+ Some(a) => out.push(a),
+ }
+ bracket_counter = bracket_counter - 1;
+ }
}
Token::LaTeXMacro => {
- todo!()
+ let texmacro = lex.slice();
+ if let Some(x) = LOOKUP.get(&texmacro) {
+ if let Some(end) = x.1 {
+ bc_up = true;
+ counter_actions.insert(bracket_counter + 1, end);
+ }
+ out.push(x.0);
+ } else {
+ out.push(texmacro)
+ }
}
},
Err(_) => {
out.push(lex.slice());
+ bc_up = false;
}
}
}
@@ -78,9 +101,11 @@ mod tests {
#[test]
fn check_sanitization() {
let result = optimized_sanitize(
- r"\mkbibquote{Intention} und \mkbibquote{Intentionen sind \hyphen\ bibquote.}",
+ r"\mkbibquote{Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote.}",
);
- println!("{}", result);
- panic!("Tatütata!");
+ assert_eq!(
+ "\"Intention\" und \"Intentionen \"sind\" - bibquote.\"",
+ result
+ )
}
}
--
cgit v1.2.3
From d80ce65ad5efb64fcce313a4c44b7f46fc5e7798 Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Sat, 4 Oct 2025 12:30:22 +0200
Subject: macro-sani: skipping the algorithm, if no macro is in the string.
---
src/bibiman/sanitize/optimized_sanitize.rs | 94 +++++++++++++++++-------------
1 file changed, 53 insertions(+), 41 deletions(-)
(limited to 'src')
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index 8788d39..3a9dc67 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -42,56 +42,68 @@ enum Token {
}
pub fn optimized_sanitize(input_text: &str) -> String {
- let mut out: Vec<&str> = Vec::with_capacity(input_text.chars().count());
- let mut bracket_counter: u32 = 0;
- let mut bc_up: bool = false;
- let mut counter_actions: HashMap = HashMap::new();
- let mut lex = Token::lexer(input_text);
- while let Some(sometoken) = lex.next() {
- match sometoken {
- Ok(token) => match token {
- Token::ForcedSpace => {
- out.push(" ");
- bc_up = false;
- }
- Token::OpenCurlyBracket => {
- if bc_up {
- bracket_counter = bracket_counter.saturating_add(1);
- } else {
- out.push("{")
+ let mut char_counter: usize = 0;
+ let mut contains_macro: bool = false;
+ for char in input_text.chars() {
+ if char == '\\' {
+ contains_macro = true;
+ }
+ char_counter = char_counter.saturating_add(1);
+ }
+ if !contains_macro {
+ input_text.to_string()
+ } else {
+ let mut out: Vec<&str> = Vec::with_capacity(char_counter);
+ let mut bracket_counter: u32 = 0;
+ let mut bc_up: bool = false;
+ let mut counter_actions: HashMap = HashMap::new();
+ let mut lex = Token::lexer(input_text);
+ while let Some(sometoken) = lex.next() {
+ match sometoken {
+ Ok(token) => match token {
+ Token::ForcedSpace => {
+ out.push(" ");
+ bc_up = false;
}
- }
- Token::ClosedCurlyBracket => {
- if bracket_counter == 0 {
- out.push("}")
- } else {
- match counter_actions.remove(&bracket_counter) {
- None => out.push("}"),
- Some(a) => out.push(a),
+ Token::OpenCurlyBracket => {
+ if bc_up {
+ bracket_counter = bracket_counter.saturating_add(1);
+ } else {
+ out.push("{")
}
- bracket_counter = bracket_counter - 1;
}
- }
- Token::LaTeXMacro => {
- let texmacro = lex.slice();
- if let Some(x) = LOOKUP.get(&texmacro) {
- if let Some(end) = x.1 {
- bc_up = true;
- counter_actions.insert(bracket_counter + 1, end);
+ Token::ClosedCurlyBracket => {
+ if bracket_counter == 0 {
+ out.push("}")
+ } else {
+ match counter_actions.remove(&bracket_counter) {
+ None => out.push("}"),
+ Some(a) => out.push(a),
+ }
+ bracket_counter = bracket_counter - 1;
+ }
+ }
+ Token::LaTeXMacro => {
+ let texmacro = lex.slice();
+ if let Some(x) = LOOKUP.get(&texmacro) {
+ if let Some(end) = x.1 {
+ bc_up = true;
+ counter_actions.insert(bracket_counter + 1, end);
+ }
+ out.push(x.0);
+ } else {
+ out.push(texmacro)
}
- out.push(x.0);
- } else {
- out.push(texmacro)
}
+ },
+ Err(_) => {
+ out.push(lex.slice());
+ bc_up = false;
}
- },
- Err(_) => {
- out.push(lex.slice());
- bc_up = false;
}
}
+ out.into_iter().collect::()
}
- out.into_iter().collect::()
}
#[cfg(test)]
--
cgit v1.2.3
From f5adcd0fad71828646b5047c661a0d8524a3fc9c Mon Sep 17 00:00:00 2001
From: Klimperfix
Date: Sat, 4 Oct 2025 12:37:40 +0200
Subject: macro-sani: Fixed whitespace handling after latex macro.
---
src/bibiman/sanitize/optimized_sanitize.rs | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
(limited to 'src')
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index 3a9dc67..8ee0115 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -35,7 +35,7 @@ enum Token {
OpenCurlyBracket,
#[token("}")]
ClosedCurlyBracket,
- #[regex(r"\\[\*\w]+")]
+ #[regex(r"\\[\*\w]+ ?")]
LaTeXMacro,
#[token(r"\ ")]
ForcedSpace,
@@ -85,7 +85,7 @@ pub fn optimized_sanitize(input_text: &str) -> String {
}
Token::LaTeXMacro => {
let texmacro = lex.slice();
- if let Some(x) = LOOKUP.get(&texmacro) {
+ if let Some(x) = LOOKUP.get(&texmacro.trim_end()) {
if let Some(end) = x.1 {
bc_up = true;
counter_actions.insert(bracket_counter + 1, end);
@@ -113,10 +113,10 @@ mod tests {
#[test]
fn check_sanitization() {
let result = optimized_sanitize(
- r"\mkbibquote{Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote.}",
+ r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}",
);
assert_eq!(
- "\"Intention\" und \"Intentionen \"sind\" - bibquote.\"",
+ "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"",
result
)
}
--
cgit v1.2.3
From 606716f064c1151ab9e8617ff76fd4b95f4a2c57 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Sun, 5 Oct 2025 11:59:24 +0200
Subject: add functions to make sanitized data from PR #57 visible in the
information tab too
---
src/bibiman/bibisetup.rs | 22 +++++++++++++++++++---
src/tui/ui.rs | 6 +++---
2 files changed, 22 insertions(+), 6 deletions(-)
(limited to 'src')
diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 37b0b01..b3f788c 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -173,7 +173,11 @@ impl BibiData {
}
pub fn title(&self) -> &str {
- &self.title
+ if let Some(sani_data) = &self.sanitized_bibi_data {
+ &sani_data.title
+ } else {
+ &self.title
+ }
}
pub fn year(&self) -> &str {
@@ -204,8 +208,20 @@ impl BibiData {
.collect_vec()
}
- pub fn subtitle(&self) -> &str {
- self.subtitle.as_ref().unwrap()
+ pub fn subtitle(&self) -> Option<&str> {
+ if let Some(sani_data) = &self.sanitized_bibi_data {
+ sani_data.subtitle.as_ref().map(|s| s.as_str())
+ } else {
+ self.subtitle.as_ref().map(|s| s.as_str())
+ }
+ }
+
+ pub fn get_abstract(&self) -> &str {
+ if let Some(sani_data) = &self.sanitized_bibi_data {
+ &sani_data.abstract_text
+ } else {
+ &self.abstract_text
+ }
}
fn create_symbols(&self, cfg: &BibiConfig) -> [Option; 3] {
diff --git a/src/tui/ui.rs b/src/tui/ui.rs
index 3e6e24c..87d8c29 100644
--- a/src/tui/ui.rs
+++ b/src/tui/ui.rs
@@ -894,7 +894,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
Style::new().fg(cfg.colors.author_color),
),
]));
- if cur_entry.subtitle.is_some() {
+ if let Some(subtitle) = cur_entry.subtitle() {
lines.push(Line::from(vec![
Span::styled("Title: ", style_value),
Span::styled(
@@ -910,7 +910,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
.add_modifier(Modifier::ITALIC),
),
Span::styled(
- cur_entry.subtitle(),
+ subtitle,
Style::new()
.fg(cfg.colors.title_color)
.add_modifier(Modifier::ITALIC),
@@ -999,7 +999,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
}
lines.push(Line::from(""));
lines.push(Line::from(vec![Span::styled(
- cur_entry.abstract_text.clone(),
+ cur_entry.get_abstract(),
Style::new().fg(cfg.colors.main_text_color),
)]));
lines
--
cgit v1.2.3
From f84ebacd1ea47b09c58dd1ef1eaaf70feaacbe0f Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Sun, 5 Oct 2025 13:16:26 +0200
Subject: add some further text macros to be hidden
---
src/bibiman/sanitize/optimized_sanitize.rs | 2 ++
tests/biblatex-test.bib | 6 +++---
2 files changed, 5 insertions(+), 3 deletions(-)
(limited to 'src')
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index 8ee0115..336cc56 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -27,6 +27,8 @@ static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = ph
r"\hyphen" => ("-", None),
r"\textbf" => ("", Some("")),
r"\textit" => ("", Some("")),
+ r"\texttt" => ("", Some("")),
+ r"\textsc" => ("", Some("")),
};
#[derive(Logos, Debug)]
diff --git a/tests/biblatex-test.bib b/tests/biblatex-test.bib
index fcc5085..2149e7c 100644
--- a/tests/biblatex-test.bib
+++ b/tests/biblatex-test.bib
@@ -107,7 +107,7 @@
}
@mvbook{aristotle_rhetoric,
- title = {The Rhetoric of {Aristotle} with a commentary by the late {Edward
+ title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward
Meredith Cope}},
shorttitle = {Rhetoric},
author = {Aristotle},
@@ -127,7 +127,7 @@
}
@book{augustine,
- title = {Heterogeneous catalysis for the synthetic chemist},
+ title = {Heterogeneous catalysis for the synthetic \textit{chemist}},
shorttitle = {Heterogeneous catalysis},
author = {Augustine, Robert L.},
location = {New York},
@@ -289,7 +289,7 @@
annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{
doi} field. Note that the \textsc{doi} is transformed into a
clickable link if \texttt{hyperref} support has been enabled},
- abstract = {The computation of ionic solvation free energies from atomistic
+ abstract = {The computation of \texttt{ionic} solvation free energies from atomistic
simulations is a surprisingly difficult problem that has found no
satisfactory solution for more than 15 years. The reason is that
the charging free energies evaluated from such simulations are
--
cgit v1.2.3