From 7350c7382bda85618c3dae1d74cc8cbe7ddd4b9d Mon Sep 17 00:00:00 2001 From: Klimperfix Date: Tue, 30 Sep 2025 21:49:31 +0200 Subject: Implemented basic sanitizing. The default Regex crate does not support the required regex features, so I'm using the fancy_regex crate that does. --- src/bibiman.rs | 3 ++ src/bibiman/sanitize.rs | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 src/bibiman/sanitize.rs (limited to 'src') diff --git a/src/bibiman.rs b/src/bibiman.rs index 6d21f8c..c423ce1 100644 --- a/src/bibiman.rs +++ b/src/bibiman.rs @@ -44,6 +44,9 @@ pub mod entries; pub mod keywords; pub mod search; +/// Module with function to sanitize text with LaTeX Macros into readable unicode text. +pub mod sanitize; + // Areas in which actions are possible #[derive(Debug)] pub enum CurrentArea { diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs new file mode 100644 index 0000000..aaf81ad --- /dev/null +++ b/src/bibiman/sanitize.rs @@ -0,0 +1,73 @@ +// bibiman - a TUI for managing BibLaTeX databases +// Copyright (C) 2025 lukeflo +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +///// + +use fancy_regex::Regex; +use unicodeit::replace as unicode_replace; + +/// Sanitizing process rules as regex cmds. +/// +/// Only macros that are not already covered by unicodeit should be processed in this way. +/// +// Regex to capture content between brackets: `(\{(?:[^{}]++|(\1))*+\})` +// Alternative without capturing the outer brackets: `\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}` +// +const SANITIZE_REGEX: &[(&str, &str)] = &[ + ( + r"\\mkbibquote\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}", + "\"${1}\"", + ), + (r"\\hyphen", "-"), +]; + +/// Sanitize one String with LaTeX Macros into a more readable one without. +/// +/// If one is going to mass-sanitize strings, one should use the [`sanitize`] +/// function for performance reasons instead, to process multiple strings at once. +/// +/// This is just a shortcut for the sanitize function. +pub fn sanitize_one(input_text: &str) -> String { + // This does not panic, the sanitize function always returns + // as many elements in the returned list as it get's elements + // in the input vector. + sanitize(vec![input_text]).get(0).unwrap().to_string() +} + +/// Sanitize multiple Strings with LaTeX Macros into more readable unicode equivalents. +/// +/// This function does always return the same amount of Strings as it gets in the input list. +pub fn sanitize(input_text: Vec<&str>) -> Vec { + let mut result: Vec = input_text.into_iter().map(|s| s.to_string()).collect(); + let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len()); + // build regex + for (search, replace) in SANITIZE_REGEX { + regex.push((Regex::new(search).unwrap(), replace)); + } + + // process strings + let result_len = result.len(); + for (re, replace) in regex { + for i in 0..result_len { + result[i] = re.replace_all(&result[i], replace).to_string(); + } + } + for i in 0..result_len { + result[i] = unicode_replace(&result[i]); + } + + // return result + result +} -- cgit v1.2.3 From dfb7edde13ca39af3e23b80e40272e02aa093919 Mon Sep 17 00:00:00 2001 From: Klimperfix Date: Fri, 3 Oct 2025 12:37:16 +0200 Subject: Sanitization hooked into bibiman. --- src/bibiman/bibisetup.rs | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs index 3bcb717..8466169 100644 --- a/src/bibiman/bibisetup.rs +++ b/src/bibiman/bibisetup.rs @@ -26,6 +26,7 @@ use std::{fs, path::PathBuf}; use walkdir::WalkDir; use crate::app; +use crate::bibiman::sanitize::sanitize_one; use crate::cliargs::{self}; use crate::config::BibiConfig; @@ -294,11 +295,28 @@ impl BibiSetup { let filepaths: (Option>, bool) = { Self::get_filepath(k, bibliography, &mut pdf_files) }; + // bibiman will sanitize some fields at this point, + // this may cause longer startup-load-times. + // + // It may be better to sanitize them somewhere else, so bibiman + // does not loose the original text-information including the + // LaTeX macros present in the bibfile. From here on, they will be + // gone. + // + // The following fields are going to be sanitized: + // + // - title + // - subtitle + // - abstract_text + // + // TODO: Once the final decision to implement the sanitization at + // this point, one could write a constructor for the BibiData struct + // which handles the sanitization. BibiData { id: i as u32, authors: Self::get_authors(k, bibliography), short_author: String::new(), - title: Self::get_title(k, bibliography), + title: sanitize_one(&Self::get_title(k, bibliography)), year: Self::get_year(k, bibliography), custom_field: ( cfg.general.custom_column.clone(), @@ -306,11 +324,14 @@ impl BibiSetup { ), keywords: Self::get_keywords(k, bibliography), citekey: k.to_owned(), - abstract_text: Self::get_abstract(k, bibliography), + abstract_text: sanitize_one(&Self::get_abstract(k, bibliography)), doi_url: Self::get_weblink(k, bibliography), filepath: filepaths.0, file_field: filepaths.1, - subtitle: Self::get_subtitle(k, bibliography), + subtitle: match Self::get_subtitle(k, bibliography) { + None => None, + Some(x) => Some(sanitize_one(&x)), + }, notes: if note_files.is_some() { Self::get_notepath(k, &mut note_files, &ext) } else { -- cgit v1.2.3 From 26befd38aedbfdd278c3096644baf69e4a1fb051 Mon Sep 17 00:00:00 2001 From: Klimperfix Date: Fri, 3 Oct 2025 16:56:30 +0200 Subject: Now storing the sanitized data seperately, keeping the original. --- src/bibiman/bibisetup.rs | 154 ++++++++++++++++++++++++++--------------------- src/bibiman/entries.rs | 8 ++- src/bibiman/sanitize.rs | 96 +++++++++++++++++++++++------ src/bibiman/search.rs | 4 +- 4 files changed, 175 insertions(+), 87 deletions(-) (limited to 'src') diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs index 8466169..48046e9 100644 --- a/src/bibiman/bibisetup.rs +++ b/src/bibiman/bibisetup.rs @@ -26,7 +26,7 @@ use std::{fs, path::PathBuf}; use walkdir::WalkDir; use crate::app; -use crate::bibiman::sanitize::sanitize_one; +use crate::bibiman::sanitize::{mass_sanitize, sanitize_one_bibidata}; use crate::cliargs::{self}; use crate::config::BibiConfig; @@ -77,6 +77,18 @@ pub struct BibiData { pub subtitle: Option, pub notes: Option>, pub symbols: [Option; 3], + /// This field should be set to None when initially creating a BibiData instance. + /// It then can be generated from the constructed BibiData Object using + /// `BibiData::gen_sanitized()` + pub sanitized_bibi_data: Option, +} + +/// Struct that holds sanitized bibidata data. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct SanitizedBibiData { + pub title: String, + pub subtitle: Option, + pub abstract_text: String, } #[derive(Debug, Clone, PartialEq)] @@ -120,22 +132,41 @@ impl BibiData { // self.pubtype(), // &self.symbols, // ] - - BibiRow { - authors: { - if self.short_author.is_empty() { - self.authors() - } else { - &self.short_author - } - }, - title: self.title(), - year: self.year(), - custom_field_value: self.custom_field_value(), - symbols: &self.symbols, + let author_ref = if self.short_author.is_empty() { + self.authors() + } else { + &self.short_author + }; + if let Some(sanidata) = &self.sanitized_bibi_data { + BibiRow { + authors: author_ref, + title: &sanidata.title, + year: self.year(), + custom_field_value: self.custom_field_value(), + symbols: &self.symbols, + } + } else { + BibiRow { + authors: author_ref, + title: self.title(), + year: self.year(), + custom_field_value: self.custom_field_value(), + symbols: &self.symbols, + } } } + /// Generates the SanitizedBibiData for the BibiData. + /// + /// Consumes self and returns a new BibiData struct. + /// + /// If multiple SanitizedBibiData are to be generated, + /// one should use the [`mass_sanitize`] function instead. + pub fn gen_sanitized(mut self) -> Self { + self.sanitized_bibi_data = Some(sanitize_one_bibidata(&self)); + self + } + pub fn entry_id(&self) -> &u32 { &self.id } @@ -288,59 +319,48 @@ impl BibiSetup { } else { None }; - citekeys - .iter() - .enumerate() - .map(|(i, k)| { - let filepaths: (Option>, bool) = - { Self::get_filepath(k, bibliography, &mut pdf_files) }; - - // bibiman will sanitize some fields at this point, - // this may cause longer startup-load-times. - // - // It may be better to sanitize them somewhere else, so bibiman - // does not loose the original text-information including the - // LaTeX macros present in the bibfile. From here on, they will be - // gone. - // - // The following fields are going to be sanitized: - // - // - title - // - subtitle - // - abstract_text - // - // TODO: Once the final decision to implement the sanitization at - // this point, one could write a constructor for the BibiData struct - // which handles the sanitization. - BibiData { - id: i as u32, - authors: Self::get_authors(k, bibliography), - short_author: String::new(), - title: sanitize_one(&Self::get_title(k, bibliography)), - year: Self::get_year(k, bibliography), - custom_field: ( - cfg.general.custom_column.clone(), - Self::get_custom_field(k, bibliography, &cfg.general.custom_column), - ), - keywords: Self::get_keywords(k, bibliography), - citekey: k.to_owned(), - abstract_text: sanitize_one(&Self::get_abstract(k, bibliography)), - doi_url: Self::get_weblink(k, bibliography), - filepath: filepaths.0, - file_field: filepaths.1, - subtitle: match Self::get_subtitle(k, bibliography) { - None => None, - Some(x) => Some(sanitize_one(&x)), - }, - notes: if note_files.is_some() { - Self::get_notepath(k, &mut note_files, &ext) - } else { - None - }, - symbols: [None, None, None], - } - }) - .collect() + // + // + // bibiman will sanitize some fields at this point, + // this may cause longer startup-load-times. + // + // + mass_sanitize( + citekeys + .iter() + .enumerate() + .map(|(i, k)| { + let filepaths: (Option>, bool) = + { Self::get_filepath(k, bibliography, &mut pdf_files) }; + + BibiData { + id: i as u32, + authors: Self::get_authors(k, bibliography), + short_author: String::new(), + title: Self::get_title(k, bibliography), + year: Self::get_year(k, bibliography), + custom_field: ( + cfg.general.custom_column.clone(), + Self::get_custom_field(k, bibliography, &cfg.general.custom_column), + ), + keywords: Self::get_keywords(k, bibliography), + citekey: k.to_owned(), + abstract_text: Self::get_abstract(k, bibliography), + doi_url: Self::get_weblink(k, bibliography), + filepath: filepaths.0, + file_field: filepaths.1, + subtitle: Self::get_subtitle(k, bibliography), + notes: if note_files.is_some() { + Self::get_notepath(k, &mut note_files, &ext) + } else { + None + }, + symbols: [None, None, None], + sanitized_bibi_data: None, + } + }) + .collect(), + ) } // get list of citekeys from the given bibfile diff --git a/src/bibiman/entries.rs b/src/bibiman/entries.rs index db6d6bf..0b35a8b 100644 --- a/src/bibiman/entries.rs +++ b/src/bibiman/entries.rs @@ -174,7 +174,9 @@ mod tests { subtitle: None, notes: None, symbols: [None, None, None], - }; + sanitized_bibi_data: None, + } + .gen_sanitized(); let entry_vec = BibiData::ref_vec(&mut entry, &cfg); @@ -194,7 +196,9 @@ mod tests { subtitle: None, notes: None, symbols: [None, None, None], - }; + sanitized_bibi_data: None, + } + .gen_sanitized(); let entry_vec_editors = BibiData::ref_vec(&mut entry_editors, &cfg); diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs index aaf81ad..614ed11 100644 --- a/src/bibiman/sanitize.rs +++ b/src/bibiman/sanitize.rs @@ -18,6 +18,8 @@ use fancy_regex::Regex; use unicodeit::replace as unicode_replace; +use crate::bibiman::bibisetup::{BibiData, SanitizedBibiData}; + /// Sanitizing process rules as regex cmds. /// /// Only macros that are not already covered by unicodeit should be processed in this way. @@ -33,6 +35,71 @@ const SANITIZE_REGEX: &[(&str, &str)] = &[ (r"\\hyphen", "-"), ]; +/// Function to build the sanitization regex vector: +fn regex_vector() -> Vec<(Regex, &'static str)> { + let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len()); + // build regex + for (search, replace) in SANITIZE_REGEX { + regex.push((Regex::new(search).unwrap(), replace)); + } + regex +} + +fn optimized_sanitize(input_text: Vec<&str>, regex: &Vec<(Regex, &str)>) -> Vec { + let mut result: Vec = input_text.into_iter().map(|s| s.to_string()).collect(); + + // process strings + let result_len = result.len(); + for (re, replace) in regex { + for i in 0..result_len { + result[i] = re.replace_all(&result[i], *replace).to_string(); + } + } + for i in 0..result_len { + result[i] = unicode_replace(&result[i]); + } + + // return result + result +} + +/// Helper macro to sanitize bibidata structs. +/// Here lives the code that generates SanitizedBibiData +/// structs from BibiData structs. +macro_rules! optimized_sanitize_bibidata { + ($bibidata:expr, $regex:expr) => { + match &$bibidata.subtitle { + None => { + let sanitized_data = + optimized_sanitize(vec![&$bibidata.title, &$bibidata.abstract_text], &$regex); + SanitizedBibiData { + title: sanitized_data[0].clone(), + subtitle: None, + abstract_text: sanitized_data[1].clone(), + } + } + Some(subtitle) => { + let sanitized_data = optimized_sanitize( + vec![&$bibidata.title, subtitle, &$bibidata.abstract_text], + &$regex, + ); + SanitizedBibiData { + title: sanitized_data[0].clone(), + subtitle: Some(sanitized_data[1].clone()), + abstract_text: sanitized_data[2].clone(), + } + } + } + }; +} + +/// Sanitize one BibiData and return a SanitizedBibiData struct. +/// This function does ignore any existing sanitization of the bibidata struct. +pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData { + let regex = regex_vector(); + optimized_sanitize_bibidata!(bibidata, regex) +} + /// Sanitize one String with LaTeX Macros into a more readable one without. /// /// If one is going to mass-sanitize strings, one should use the [`sanitize`] @@ -50,24 +117,19 @@ pub fn sanitize_one(input_text: &str) -> String { /// /// This function does always return the same amount of Strings as it gets in the input list. pub fn sanitize(input_text: Vec<&str>) -> Vec { - let mut result: Vec = input_text.into_iter().map(|s| s.to_string()).collect(); - let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len()); - // build regex - for (search, replace) in SANITIZE_REGEX { - regex.push((Regex::new(search).unwrap(), replace)); - } + optimized_sanitize(input_text, ®ex_vector()) +} - // process strings - let result_len = result.len(); - for (re, replace) in regex { - for i in 0..result_len { - result[i] = re.replace_all(&result[i], replace).to_string(); - } - } - for i in 0..result_len { - result[i] = unicode_replace(&result[i]); - } +/// Sanitize a whole `Vec`, returning a new sanitized one. +pub fn mass_sanitize(bibidata: Vec) -> Vec { + let regex: Vec<(Regex, &str)> = regex_vector(); - // return result + let mut result: Vec = Vec::with_capacity(bibidata.len()); + for entry in bibidata { + result.push(BibiData { + sanitized_bibi_data: Some(optimized_sanitize_bibidata!(entry, regex)), + ..entry + }); + } result } diff --git a/src/bibiman/search.rs b/src/bibiman/search.rs index e0c5f17..2156634 100644 --- a/src/bibiman/search.rs +++ b/src/bibiman/search.rs @@ -141,7 +141,9 @@ mod tests { subtitle: None, notes: None, symbols: [None, None, None], - }; + sanitized_bibi_data: None, + } + .gen_sanitized(); let joined_vec = BibiSearch::convert_to_string(&bibvec); -- cgit v1.2.3 From 161fc7010cb863e1af534ce1d173136401816a32 Mon Sep 17 00:00:00 2001 From: Klimperfix Date: Fri, 3 Oct 2025 18:19:25 +0200 Subject: Removed unused sanitization functions. --- src/bibiman/sanitize.rs | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'src') diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs index 614ed11..823b91c 100644 --- a/src/bibiman/sanitize.rs +++ b/src/bibiman/sanitize.rs @@ -100,26 +100,6 @@ pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData { optimized_sanitize_bibidata!(bibidata, regex) } -/// Sanitize one String with LaTeX Macros into a more readable one without. -/// -/// If one is going to mass-sanitize strings, one should use the [`sanitize`] -/// function for performance reasons instead, to process multiple strings at once. -/// -/// This is just a shortcut for the sanitize function. -pub fn sanitize_one(input_text: &str) -> String { - // This does not panic, the sanitize function always returns - // as many elements in the returned list as it get's elements - // in the input vector. - sanitize(vec![input_text]).get(0).unwrap().to_string() -} - -/// Sanitize multiple Strings with LaTeX Macros into more readable unicode equivalents. -/// -/// This function does always return the same amount of Strings as it gets in the input list. -pub fn sanitize(input_text: Vec<&str>) -> Vec { - optimized_sanitize(input_text, ®ex_vector()) -} - /// Sanitize a whole `Vec`, returning a new sanitized one. pub fn mass_sanitize(bibidata: Vec) -> Vec { let regex: Vec<(Regex, &str)> = regex_vector(); -- cgit v1.2.3 From 624977bb9fd209b0c7c5f60a1332718de1d460d4 Mon Sep 17 00:00:00 2001 From: Klimperfix Date: Fri, 3 Oct 2025 22:57:37 +0200 Subject: macro-sani: started impl new algorithm --- Cargo.lock | 146 +++++++++++++++++++++-------- Cargo.toml | 4 +- src/bibiman/bibisetup.rs | 76 +++++++-------- src/bibiman/sanitize.rs | 94 +++---------------- src/bibiman/sanitize/optimized_sanitize.rs | 86 +++++++++++++++++ 5 files changed, 241 insertions(+), 165 deletions(-) create mode 100644 src/bibiman/sanitize/optimized_sanitize.rs (limited to 'src') diff --git a/Cargo.lock b/Cargo.lock index 535b929..22a5a48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bibiman" version = "0.14.1" @@ -99,13 +105,14 @@ dependencies = [ "crossterm", "dirs", "editor-command", - "fancy-regex", "figment", "futures", "itertools", "lexopt", + "logos", "nucleo-matcher", "owo-colors", + "phf", "rand", "ratatui", "regex", @@ -114,7 +121,6 @@ dependencies = [ "tokio", "tokio-util", "tui-input", - "unicodeit", "ureq", "walkdir", ] @@ -132,21 +138,6 @@ dependencies = [ "unscanny", ] -[[package]] -name = "bit-set" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" -dependencies = [ - "bit-vec", -] - -[[package]] -name = "bit-vec" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" - [[package]] name = "bitflags" version = "1.3.2" @@ -426,17 +417,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "fancy-regex" -version = "0.16.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f" -dependencies = [ - "bit-set", - "regex-automata", - "regex-syntax", -] - [[package]] name = "fastrand" version = "2.3.0" @@ -893,6 +873,40 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +[[package]] +name = "logos" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470" +dependencies = [ + "logos-codegen", +] + [[package]] name = "lru" version = "0.12.5" @@ -1126,6 +1140,49 @@ dependencies = [ "indexmap", ] +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", + "serde", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1329,6 +1386,15 @@ version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "0.38.44" @@ -1417,6 +1483,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "serde" version = "1.0.219" @@ -1503,6 +1575,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.10" @@ -1891,18 +1969,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" -[[package]] -name = "unicodeit" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1069c222ea63347e2e59763aa12d32c9c6a4e595931c7724a769f6a75bfbc553" -dependencies = [ - "aho-corasick", - "cfg-if", - "memchr", - "regex", -] - [[package]] name = "unscanny" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 2d596de..a01a7e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,5 +39,5 @@ ureq = "2.12.1" serde = { version = "1.0.217", features = ["serde_derive"] } figment = { version = "0.10.19", features = [ "toml", "test" ]} owo-colors = "4.2.2" -unicodeit = { version = "0.2.0", features = ["naive-impl"] } -fancy-regex = "0.16.2" +logos = "0.15.1" +phf = { version = "0.13.1", features = ["macros"] } diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs index 48046e9..37b0b01 100644 --- a/src/bibiman/bibisetup.rs +++ b/src/bibiman/bibisetup.rs @@ -26,7 +26,7 @@ use std::{fs, path::PathBuf}; use walkdir::WalkDir; use crate::app; -use crate::bibiman::sanitize::{mass_sanitize, sanitize_one_bibidata}; +use crate::bibiman::sanitize::sanitize_one_bibidata; use crate::cliargs::{self}; use crate::config::BibiConfig; @@ -159,9 +159,6 @@ impl BibiData { /// Generates the SanitizedBibiData for the BibiData. /// /// Consumes self and returns a new BibiData struct. - /// - /// If multiple SanitizedBibiData are to be generated, - /// one should use the [`mass_sanitize`] function instead. pub fn gen_sanitized(mut self) -> Self { self.sanitized_bibi_data = Some(sanitize_one_bibidata(&self)); self @@ -325,42 +322,41 @@ impl BibiSetup { // this may cause longer startup-load-times. // // - mass_sanitize( - citekeys - .iter() - .enumerate() - .map(|(i, k)| { - let filepaths: (Option>, bool) = - { Self::get_filepath(k, bibliography, &mut pdf_files) }; - - BibiData { - id: i as u32, - authors: Self::get_authors(k, bibliography), - short_author: String::new(), - title: Self::get_title(k, bibliography), - year: Self::get_year(k, bibliography), - custom_field: ( - cfg.general.custom_column.clone(), - Self::get_custom_field(k, bibliography, &cfg.general.custom_column), - ), - keywords: Self::get_keywords(k, bibliography), - citekey: k.to_owned(), - abstract_text: Self::get_abstract(k, bibliography), - doi_url: Self::get_weblink(k, bibliography), - filepath: filepaths.0, - file_field: filepaths.1, - subtitle: Self::get_subtitle(k, bibliography), - notes: if note_files.is_some() { - Self::get_notepath(k, &mut note_files, &ext) - } else { - None - }, - symbols: [None, None, None], - sanitized_bibi_data: None, - } - }) - .collect(), - ) + citekeys + .iter() + .enumerate() + .map(|(i, k)| { + let filepaths: (Option>, bool) = + { Self::get_filepath(k, bibliography, &mut pdf_files) }; + + BibiData { + id: i as u32, + authors: Self::get_authors(k, bibliography), + short_author: String::new(), + title: Self::get_title(k, bibliography), + year: Self::get_year(k, bibliography), + custom_field: ( + cfg.general.custom_column.clone(), + Self::get_custom_field(k, bibliography, &cfg.general.custom_column), + ), + keywords: Self::get_keywords(k, bibliography), + citekey: k.to_owned(), + abstract_text: Self::get_abstract(k, bibliography), + doi_url: Self::get_weblink(k, bibliography), + filepath: filepaths.0, + file_field: filepaths.1, + subtitle: Self::get_subtitle(k, bibliography), + notes: if note_files.is_some() { + Self::get_notepath(k, &mut note_files, &ext) + } else { + None + }, + symbols: [None, None, None], + sanitized_bibi_data: None, + } + .gen_sanitized() + }) + .collect() } // get list of citekeys from the given bibfile diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs index 823b91c..9ccf4c4 100644 --- a/src/bibiman/sanitize.rs +++ b/src/bibiman/sanitize.rs @@ -15,80 +15,23 @@ // along with this program. If not, see . ///// -use fancy_regex::Regex; -use unicodeit::replace as unicode_replace; - use crate::bibiman::bibisetup::{BibiData, SanitizedBibiData}; -/// Sanitizing process rules as regex cmds. -/// -/// Only macros that are not already covered by unicodeit should be processed in this way. -/// -// Regex to capture content between brackets: `(\{(?:[^{}]++|(\1))*+\})` -// Alternative without capturing the outer brackets: `\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}` -// -const SANITIZE_REGEX: &[(&str, &str)] = &[ - ( - r"\\mkbibquote\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}", - "\"${1}\"", - ), - (r"\\hyphen", "-"), -]; - -/// Function to build the sanitization regex vector: -fn regex_vector() -> Vec<(Regex, &'static str)> { - let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len()); - // build regex - for (search, replace) in SANITIZE_REGEX { - regex.push((Regex::new(search).unwrap(), replace)); - } - regex -} - -fn optimized_sanitize(input_text: Vec<&str>, regex: &Vec<(Regex, &str)>) -> Vec { - let mut result: Vec = input_text.into_iter().map(|s| s.to_string()).collect(); - - // process strings - let result_len = result.len(); - for (re, replace) in regex { - for i in 0..result_len { - result[i] = re.replace_all(&result[i], *replace).to_string(); - } - } - for i in 0..result_len { - result[i] = unicode_replace(&result[i]); - } - - // return result - result -} +mod optimized_sanitize; +use optimized_sanitize::optimized_sanitize; /// Helper macro to sanitize bibidata structs. /// Here lives the code that generates SanitizedBibiData /// structs from BibiData structs. macro_rules! optimized_sanitize_bibidata { - ($bibidata:expr, $regex:expr) => { - match &$bibidata.subtitle { - None => { - let sanitized_data = - optimized_sanitize(vec![&$bibidata.title, &$bibidata.abstract_text], &$regex); - SanitizedBibiData { - title: sanitized_data[0].clone(), - subtitle: None, - abstract_text: sanitized_data[1].clone(), - } - } - Some(subtitle) => { - let sanitized_data = optimized_sanitize( - vec![&$bibidata.title, subtitle, &$bibidata.abstract_text], - &$regex, - ); - SanitizedBibiData { - title: sanitized_data[0].clone(), - subtitle: Some(sanitized_data[1].clone()), - abstract_text: sanitized_data[2].clone(), - } - } + ($bibidata:expr) => { + SanitizedBibiData { + title: optimized_sanitize(&$bibidata.title), + subtitle: match &$bibidata.subtitle { + None => None, + Some(subtitle) => Some(optimized_sanitize(subtitle)), + }, + abstract_text: optimized_sanitize(&$bibidata.abstract_text), } }; } @@ -96,20 +39,5 @@ macro_rules! optimized_sanitize_bibidata { /// Sanitize one BibiData and return a SanitizedBibiData struct. /// This function does ignore any existing sanitization of the bibidata struct. pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData { - let regex = regex_vector(); - optimized_sanitize_bibidata!(bibidata, regex) -} - -/// Sanitize a whole `Vec`, returning a new sanitized one. -pub fn mass_sanitize(bibidata: Vec) -> Vec { - let regex: Vec<(Regex, &str)> = regex_vector(); - - let mut result: Vec = Vec::with_capacity(bibidata.len()); - for entry in bibidata { - result.push(BibiData { - sanitized_bibi_data: Some(optimized_sanitize_bibidata!(entry, regex)), - ..entry - }); - } - result + optimized_sanitize_bibidata!(bibidata) } diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs new file mode 100644 index 0000000..b3bf90d --- /dev/null +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -0,0 +1,86 @@ +// bibiman - a TUI for managing BibLaTeX databases +// Copyright (C) 2025 lukeflo +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +///// + +use phf::phf_map; +use std::collections::HashMap; + +use logos::Logos; + +static LOOKUP: phf::Map<&'static str, &'static str> = phf_map! { + " " => " ", // str a forced space should substitute to. +}; + +#[derive(Logos, Debug)] +enum Token { + #[token("{")] + OpenCurlyBracket, + #[token("}")] + ClosedCurlyBracket, + #[regex(r"\\\w+")] + LaTeXMacro, + #[token(r"\ ")] + ForcedSpace, +} + +pub fn optimized_sanitize(input_text: &str) -> String { + let mut out: Vec<&str> = Vec::new(); + let mut bracket_counter: u32 = 0; + let mut counter_actions: HashMap = HashMap::new(); + let mut lex = Token::lexer(input_text); + while let Some(sometoken) = lex.next() { + match sometoken { + Ok(token) => match token { + Token::ForcedSpace => { + out.push( + LOOKUP + .get(" ") + .expect("Something is wrong with the sanitization lookup table."), + ); + } + Token::OpenCurlyBracket => { + bracket_counter.saturating_add(1); + todo!(); + } + Token::ClosedCurlyBracket => { + bracket_counter.saturating_sub(1); + todo!(); + } + Token::LaTeXMacro => { + todo!() + } + }, + Err(_) => { + out.push(lex.slice()); + } + } + } + out.into_iter().collect::() +} + +#[cfg(test)] +mod tests { + use super::optimized_sanitize; + + #[test] + fn check_sanitization() { + let result = optimized_sanitize( + r"\mkbibquote{Intention} und \mkbibquote{Intentionen sind \hyphen\ bibquote.}", + ); + println!("{}", result); + panic!("Tatütata!"); + } +} -- cgit v1.2.3 From 3ba8f024577e52c51833cd34b07ad90d14cb6338 Mon Sep 17 00:00:00 2001 From: Klimperfix Date: Sat, 4 Oct 2025 12:00:11 +0200 Subject: macro-sani: Implemented new algorithm to replace macros. --- src/bibiman/sanitize/optimized_sanitize.rs | 61 +++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index b3bf90d..8788d39 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -20,8 +20,13 @@ use std::collections::HashMap; use logos::Logos; -static LOOKUP: phf::Map<&'static str, &'static str> = phf_map! { - " " => " ", // str a forced space should substitute to. +static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! { + r"\mkbibquote" => ("\"", Some("\"")), + r"\enquote*" => ("\'", Some("\'")), + r"\enquote" => ("\"", Some("\"")), + r"\hyphen" => ("-", None), + r"\textbf" => ("", Some("")), + r"\textit" => ("", Some("")), }; #[derive(Logos, Debug)] @@ -30,41 +35,59 @@ enum Token { OpenCurlyBracket, #[token("}")] ClosedCurlyBracket, - #[regex(r"\\\w+")] + #[regex(r"\\[\*\w]+")] LaTeXMacro, #[token(r"\ ")] ForcedSpace, } pub fn optimized_sanitize(input_text: &str) -> String { - let mut out: Vec<&str> = Vec::new(); + let mut out: Vec<&str> = Vec::with_capacity(input_text.chars().count()); let mut bracket_counter: u32 = 0; - let mut counter_actions: HashMap = HashMap::new(); + let mut bc_up: bool = false; + let mut counter_actions: HashMap = HashMap::new(); let mut lex = Token::lexer(input_text); while let Some(sometoken) = lex.next() { match sometoken { Ok(token) => match token { Token::ForcedSpace => { - out.push( - LOOKUP - .get(" ") - .expect("Something is wrong with the sanitization lookup table."), - ); + out.push(" "); + bc_up = false; } Token::OpenCurlyBracket => { - bracket_counter.saturating_add(1); - todo!(); + if bc_up { + bracket_counter = bracket_counter.saturating_add(1); + } else { + out.push("{") + } } Token::ClosedCurlyBracket => { - bracket_counter.saturating_sub(1); - todo!(); + if bracket_counter == 0 { + out.push("}") + } else { + match counter_actions.remove(&bracket_counter) { + None => out.push("}"), + Some(a) => out.push(a), + } + bracket_counter = bracket_counter - 1; + } } Token::LaTeXMacro => { - todo!() + let texmacro = lex.slice(); + if let Some(x) = LOOKUP.get(&texmacro) { + if let Some(end) = x.1 { + bc_up = true; + counter_actions.insert(bracket_counter + 1, end); + } + out.push(x.0); + } else { + out.push(texmacro) + } } }, Err(_) => { out.push(lex.slice()); + bc_up = false; } } } @@ -78,9 +101,11 @@ mod tests { #[test] fn check_sanitization() { let result = optimized_sanitize( - r"\mkbibquote{Intention} und \mkbibquote{Intentionen sind \hyphen\ bibquote.}", + r"\mkbibquote{Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote.}", ); - println!("{}", result); - panic!("Tatütata!"); + assert_eq!( + "\"Intention\" und \"Intentionen \"sind\" - bibquote.\"", + result + ) } } -- cgit v1.2.3 From d80ce65ad5efb64fcce313a4c44b7f46fc5e7798 Mon Sep 17 00:00:00 2001 From: Klimperfix Date: Sat, 4 Oct 2025 12:30:22 +0200 Subject: macro-sani: skipping the algorithm, if no macro is in the string. --- src/bibiman/sanitize/optimized_sanitize.rs | 94 +++++++++++++++++------------- 1 file changed, 53 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index 8788d39..3a9dc67 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -42,56 +42,68 @@ enum Token { } pub fn optimized_sanitize(input_text: &str) -> String { - let mut out: Vec<&str> = Vec::with_capacity(input_text.chars().count()); - let mut bracket_counter: u32 = 0; - let mut bc_up: bool = false; - let mut counter_actions: HashMap = HashMap::new(); - let mut lex = Token::lexer(input_text); - while let Some(sometoken) = lex.next() { - match sometoken { - Ok(token) => match token { - Token::ForcedSpace => { - out.push(" "); - bc_up = false; - } - Token::OpenCurlyBracket => { - if bc_up { - bracket_counter = bracket_counter.saturating_add(1); - } else { - out.push("{") + let mut char_counter: usize = 0; + let mut contains_macro: bool = false; + for char in input_text.chars() { + if char == '\\' { + contains_macro = true; + } + char_counter = char_counter.saturating_add(1); + } + if !contains_macro { + input_text.to_string() + } else { + let mut out: Vec<&str> = Vec::with_capacity(char_counter); + let mut bracket_counter: u32 = 0; + let mut bc_up: bool = false; + let mut counter_actions: HashMap = HashMap::new(); + let mut lex = Token::lexer(input_text); + while let Some(sometoken) = lex.next() { + match sometoken { + Ok(token) => match token { + Token::ForcedSpace => { + out.push(" "); + bc_up = false; } - } - Token::ClosedCurlyBracket => { - if bracket_counter == 0 { - out.push("}") - } else { - match counter_actions.remove(&bracket_counter) { - None => out.push("}"), - Some(a) => out.push(a), + Token::OpenCurlyBracket => { + if bc_up { + bracket_counter = bracket_counter.saturating_add(1); + } else { + out.push("{") } - bracket_counter = bracket_counter - 1; } - } - Token::LaTeXMacro => { - let texmacro = lex.slice(); - if let Some(x) = LOOKUP.get(&texmacro) { - if let Some(end) = x.1 { - bc_up = true; - counter_actions.insert(bracket_counter + 1, end); + Token::ClosedCurlyBracket => { + if bracket_counter == 0 { + out.push("}") + } else { + match counter_actions.remove(&bracket_counter) { + None => out.push("}"), + Some(a) => out.push(a), + } + bracket_counter = bracket_counter - 1; + } + } + Token::LaTeXMacro => { + let texmacro = lex.slice(); + if let Some(x) = LOOKUP.get(&texmacro) { + if let Some(end) = x.1 { + bc_up = true; + counter_actions.insert(bracket_counter + 1, end); + } + out.push(x.0); + } else { + out.push(texmacro) } - out.push(x.0); - } else { - out.push(texmacro) } + }, + Err(_) => { + out.push(lex.slice()); + bc_up = false; } - }, - Err(_) => { - out.push(lex.slice()); - bc_up = false; } } + out.into_iter().collect::() } - out.into_iter().collect::() } #[cfg(test)] -- cgit v1.2.3 From f5adcd0fad71828646b5047c661a0d8524a3fc9c Mon Sep 17 00:00:00 2001 From: Klimperfix Date: Sat, 4 Oct 2025 12:37:40 +0200 Subject: macro-sani: Fixed whitespace handling after latex macro. --- src/bibiman/sanitize/optimized_sanitize.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index 3a9dc67..8ee0115 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -35,7 +35,7 @@ enum Token { OpenCurlyBracket, #[token("}")] ClosedCurlyBracket, - #[regex(r"\\[\*\w]+")] + #[regex(r"\\[\*\w]+ ?")] LaTeXMacro, #[token(r"\ ")] ForcedSpace, @@ -85,7 +85,7 @@ pub fn optimized_sanitize(input_text: &str) -> String { } Token::LaTeXMacro => { let texmacro = lex.slice(); - if let Some(x) = LOOKUP.get(&texmacro) { + if let Some(x) = LOOKUP.get(&texmacro.trim_end()) { if let Some(end) = x.1 { bc_up = true; counter_actions.insert(bracket_counter + 1, end); @@ -113,10 +113,10 @@ mod tests { #[test] fn check_sanitization() { let result = optimized_sanitize( - r"\mkbibquote{Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote.}", + r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}", ); assert_eq!( - "\"Intention\" und \"Intentionen \"sind\" - bibquote.\"", + "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"", result ) } -- cgit v1.2.3 From 606716f064c1151ab9e8617ff76fd4b95f4a2c57 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Sun, 5 Oct 2025 11:59:24 +0200 Subject: add functions to make sanitized data from PR #57 visible in the information tab too --- src/bibiman/bibisetup.rs | 22 +++++++++++++++++++--- src/tui/ui.rs | 6 +++--- 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs index 37b0b01..b3f788c 100644 --- a/src/bibiman/bibisetup.rs +++ b/src/bibiman/bibisetup.rs @@ -173,7 +173,11 @@ impl BibiData { } pub fn title(&self) -> &str { - &self.title + if let Some(sani_data) = &self.sanitized_bibi_data { + &sani_data.title + } else { + &self.title + } } pub fn year(&self) -> &str { @@ -204,8 +208,20 @@ impl BibiData { .collect_vec() } - pub fn subtitle(&self) -> &str { - self.subtitle.as_ref().unwrap() + pub fn subtitle(&self) -> Option<&str> { + if let Some(sani_data) = &self.sanitized_bibi_data { + sani_data.subtitle.as_ref().map(|s| s.as_str()) + } else { + self.subtitle.as_ref().map(|s| s.as_str()) + } + } + + pub fn get_abstract(&self) -> &str { + if let Some(sani_data) = &self.sanitized_bibi_data { + &sani_data.abstract_text + } else { + &self.abstract_text + } } fn create_symbols(&self, cfg: &BibiConfig) -> [Option; 3] { diff --git a/src/tui/ui.rs b/src/tui/ui.rs index 3e6e24c..87d8c29 100644 --- a/src/tui/ui.rs +++ b/src/tui/ui.rs @@ -894,7 +894,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame, Style::new().fg(cfg.colors.author_color), ), ])); - if cur_entry.subtitle.is_some() { + if let Some(subtitle) = cur_entry.subtitle() { lines.push(Line::from(vec![ Span::styled("Title: ", style_value), Span::styled( @@ -910,7 +910,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame, .add_modifier(Modifier::ITALIC), ), Span::styled( - cur_entry.subtitle(), + subtitle, Style::new() .fg(cfg.colors.title_color) .add_modifier(Modifier::ITALIC), @@ -999,7 +999,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame, } lines.push(Line::from("")); lines.push(Line::from(vec![Span::styled( - cur_entry.abstract_text.clone(), + cur_entry.get_abstract(), Style::new().fg(cfg.colors.main_text_color), )])); lines -- cgit v1.2.3 From f84ebacd1ea47b09c58dd1ef1eaaf70feaacbe0f Mon Sep 17 00:00:00 2001 From: lukeflo Date: Sun, 5 Oct 2025 13:16:26 +0200 Subject: add some further text macros to be hidden --- src/bibiman/sanitize/optimized_sanitize.rs | 2 ++ tests/biblatex-test.bib | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index 8ee0115..336cc56 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -27,6 +27,8 @@ static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = ph r"\hyphen" => ("-", None), r"\textbf" => ("", Some("")), r"\textit" => ("", Some("")), + r"\texttt" => ("", Some("")), + r"\textsc" => ("", Some("")), }; #[derive(Logos, Debug)] diff --git a/tests/biblatex-test.bib b/tests/biblatex-test.bib index fcc5085..2149e7c 100644 --- a/tests/biblatex-test.bib +++ b/tests/biblatex-test.bib @@ -107,7 +107,7 @@ } @mvbook{aristotle_rhetoric, - title = {The Rhetoric of {Aristotle} with a commentary by the late {Edward + title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward Meredith Cope}}, shorttitle = {Rhetoric}, author = {Aristotle}, @@ -127,7 +127,7 @@ } @book{augustine, - title = {Heterogeneous catalysis for the synthetic chemist}, + title = {Heterogeneous catalysis for the synthetic \textit{chemist}}, shorttitle = {Heterogeneous catalysis}, author = {Augustine, Robert L.}, location = {New York}, @@ -289,7 +289,7 @@ annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{ doi} field. Note that the \textsc{doi} is transformed into a clickable link if \texttt{hyperref} support has been enabled}, - abstract = {The computation of ionic solvation free energies from atomistic + abstract = {The computation of \texttt{ionic} solvation free energies from atomistic simulations is a surprisingly difficult problem that has found no satisfactory solution for more than 15 years. The reason is that the charging free energies evaluated from such simulations are -- cgit v1.2.3