aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlukeflo2025-10-06 11:52:32 +0200
committerlukeflo2025-10-06 11:52:32 +0200
commitecc4e102a5e8203407ca30f4d7c0abb871fa7111 (patch)
tree5d859c8f9859e66507c717727805d86b4543294e
parentdb0deb9b6ee59c07ab2f04972184b154925034bd (diff)
parentf84ebacd1ea47b09c58dd1ef1eaaf70feaacbe0f (diff)
downloadbibiman-ecc4e102a5e8203407ca30f4d7c0abb871fa7111.tar.gz
bibiman-ecc4e102a5e8203407ca30f4d7c0abb871fa7111.zip
Merge pull request 'Parse LaTeX macros in entry fields' (#57) from Klimperfix/bibiman:impl-latex-macro-sanitizing into main
Reviewed-on: https://codeberg.org/lukeflo/bibiman/pulls/57 Reviewed-by: lukeflo <lukeflo@noreply.codeberg.org>
-rw-r--r--Cargo.lock106
-rw-r--r--Cargo.toml2
-rw-r--r--src/bibiman.rs3
-rw-r--r--src/bibiman/bibisetup.rs85
-rw-r--r--src/bibiman/entries.rs8
-rw-r--r--src/bibiman/sanitize.rs43
-rw-r--r--src/bibiman/sanitize/optimized_sanitize.rs125
-rw-r--r--src/bibiman/search.rs4
-rw-r--r--src/tui/ui.rs6
-rw-r--r--tests/biblatex-test.bib19
10 files changed, 372 insertions, 29 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 3661e99..22a5a48 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -90,6 +90,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
+name = "beef"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
+
+[[package]]
name = "bibiman"
version = "0.14.1"
dependencies = [
@@ -103,8 +109,10 @@ dependencies = [
"futures",
"itertools",
"lexopt",
+ "logos",
"nucleo-matcher",
"owo-colors",
+ "phf",
"rand",
"ratatui",
"regex",
@@ -866,6 +874,40 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
+name = "logos"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154"
+dependencies = [
+ "logos-derive",
+]
+
+[[package]]
+name = "logos-codegen"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c"
+dependencies = [
+ "beef",
+ "fnv",
+ "lazy_static",
+ "proc-macro2",
+ "quote",
+ "regex-syntax",
+ "rustc_version",
+ "syn",
+]
+
+[[package]]
+name = "logos-derive"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470"
+dependencies = [
+ "logos-codegen",
+]
+
+[[package]]
name = "lru"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1099,6 +1141,49 @@ dependencies = [
]
[[package]]
+name = "phf"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
+dependencies = [
+ "phf_macros",
+ "phf_shared",
+ "serde",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
+dependencies = [
+ "fastrand",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
+dependencies = [
+ "siphasher",
+]
+
+[[package]]
name = "pin-project-lite"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1302,6 +1387,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
+[[package]]
name = "rustix"
version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1390,6 +1484,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
name = "serde"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1476,6 +1576,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
[[package]]
+name = "siphasher"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
+
+[[package]]
name = "slab"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index b3fc774..a01a7e7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,3 +39,5 @@ ureq = "2.12.1"
serde = { version = "1.0.217", features = ["serde_derive"] }
figment = { version = "0.10.19", features = [ "toml", "test" ]}
owo-colors = "4.2.2"
+logos = "0.15.1"
+phf = { version = "0.13.1", features = ["macros"] }
diff --git a/src/bibiman.rs b/src/bibiman.rs
index 6d21f8c..c423ce1 100644
--- a/src/bibiman.rs
+++ b/src/bibiman.rs
@@ -44,6 +44,9 @@ pub mod entries;
pub mod keywords;
pub mod search;
+/// Module with function to sanitize text with LaTeX Macros into readable unicode text.
+pub mod sanitize;
+
// Areas in which actions are possible
#[derive(Debug)]
pub enum CurrentArea {
diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 3bcb717..b3f788c 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -26,6 +26,7 @@ use std::{fs, path::PathBuf};
use walkdir::WalkDir;
use crate::app;
+use crate::bibiman::sanitize::sanitize_one_bibidata;
use crate::cliargs::{self};
use crate::config::BibiConfig;
@@ -76,6 +77,18 @@ pub struct BibiData {
pub subtitle: Option<String>,
pub notes: Option<Vec<OsString>>,
pub symbols: [Option<String>; 3],
+ /// This field should be set to None when initially creating a BibiData instance.
+ /// It then can be generated from the constructed BibiData Object using
+ /// `BibiData::gen_sanitized()`
+ pub sanitized_bibi_data: Option<SanitizedBibiData>,
+}
+
+/// Struct that holds sanitized bibidata data.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct SanitizedBibiData {
+ pub title: String,
+ pub subtitle: Option<String>,
+ pub abstract_text: String,
}
#[derive(Debug, Clone, PartialEq)]
@@ -119,22 +132,38 @@ impl BibiData {
// self.pubtype(),
// &self.symbols,
// ]
-
- BibiRow {
- authors: {
- if self.short_author.is_empty() {
- self.authors()
- } else {
- &self.short_author
- }
- },
- title: self.title(),
- year: self.year(),
- custom_field_value: self.custom_field_value(),
- symbols: &self.symbols,
+ let author_ref = if self.short_author.is_empty() {
+ self.authors()
+ } else {
+ &self.short_author
+ };
+ if let Some(sanidata) = &self.sanitized_bibi_data {
+ BibiRow {
+ authors: author_ref,
+ title: &sanidata.title,
+ year: self.year(),
+ custom_field_value: self.custom_field_value(),
+ symbols: &self.symbols,
+ }
+ } else {
+ BibiRow {
+ authors: author_ref,
+ title: self.title(),
+ year: self.year(),
+ custom_field_value: self.custom_field_value(),
+ symbols: &self.symbols,
+ }
}
}
+ /// Generates the SanitizedBibiData for the BibiData.
+ ///
+ /// Consumes self and returns a new BibiData struct.
+ pub fn gen_sanitized(mut self) -> Self {
+ self.sanitized_bibi_data = Some(sanitize_one_bibidata(&self));
+ self
+ }
+
pub fn entry_id(&self) -> &u32 {
&self.id
}
@@ -144,7 +173,11 @@ impl BibiData {
}
pub fn title(&self) -> &str {
- &self.title
+ if let Some(sani_data) = &self.sanitized_bibi_data {
+ &sani_data.title
+ } else {
+ &self.title
+ }
}
pub fn year(&self) -> &str {
@@ -175,8 +208,20 @@ impl BibiData {
.collect_vec()
}
- pub fn subtitle(&self) -> &str {
- self.subtitle.as_ref().unwrap()
+ pub fn subtitle(&self) -> Option<&str> {
+ if let Some(sani_data) = &self.sanitized_bibi_data {
+ sani_data.subtitle.as_ref().map(|s| s.as_str())
+ } else {
+ self.subtitle.as_ref().map(|s| s.as_str())
+ }
+ }
+
+ pub fn get_abstract(&self) -> &str {
+ if let Some(sani_data) = &self.sanitized_bibi_data {
+ &sani_data.abstract_text
+ } else {
+ &self.abstract_text
+ }
}
fn create_symbols(&self, cfg: &BibiConfig) -> [Option<String>; 3] {
@@ -287,6 +332,12 @@ impl BibiSetup {
} else {
None
};
+ //
+ //
+ // bibiman will sanitize some fields at this point,
+ // this may cause longer startup-load-times.
+ //
+ //
citekeys
.iter()
.enumerate()
@@ -317,7 +368,9 @@ impl BibiSetup {
None
},
symbols: [None, None, None],
+ sanitized_bibi_data: None,
}
+ .gen_sanitized()
})
.collect()
}
diff --git a/src/bibiman/entries.rs b/src/bibiman/entries.rs
index db6d6bf..0b35a8b 100644
--- a/src/bibiman/entries.rs
+++ b/src/bibiman/entries.rs
@@ -174,7 +174,9 @@ mod tests {
subtitle: None,
notes: None,
symbols: [None, None, None],
- };
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized();
let entry_vec = BibiData::ref_vec(&mut entry, &cfg);
@@ -194,7 +196,9 @@ mod tests {
subtitle: None,
notes: None,
symbols: [None, None, None],
- };
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized();
let entry_vec_editors = BibiData::ref_vec(&mut entry_editors, &cfg);
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
new file mode 100644
index 0000000..9ccf4c4
--- /dev/null
+++ b/src/bibiman/sanitize.rs
@@ -0,0 +1,43 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025 lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use crate::bibiman::bibisetup::{BibiData, SanitizedBibiData};
+
+mod optimized_sanitize;
+use optimized_sanitize::optimized_sanitize;
+
+/// Helper macro to sanitize bibidata structs.
+/// Here lives the code that generates SanitizedBibiData
+/// structs from BibiData structs.
+macro_rules! optimized_sanitize_bibidata {
+ ($bibidata:expr) => {
+ SanitizedBibiData {
+ title: optimized_sanitize(&$bibidata.title),
+ subtitle: match &$bibidata.subtitle {
+ None => None,
+ Some(subtitle) => Some(optimized_sanitize(subtitle)),
+ },
+ abstract_text: optimized_sanitize(&$bibidata.abstract_text),
+ }
+ };
+}
+
+/// Sanitize one BibiData and return a SanitizedBibiData struct.
+/// This function does ignore any existing sanitization of the bibidata struct.
+pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
+ optimized_sanitize_bibidata!(bibidata)
+}
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
new file mode 100644
index 0000000..336cc56
--- /dev/null
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -0,0 +1,125 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025 lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use phf::phf_map;
+use std::collections::HashMap;
+
+use logos::Logos;
+
+static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! {
+ r"\mkbibquote" => ("\"", Some("\"")),
+ r"\enquote*" => ("\'", Some("\'")),
+ r"\enquote" => ("\"", Some("\"")),
+ r"\hyphen" => ("-", None),
+ r"\textbf" => ("", Some("")),
+ r"\textit" => ("", Some("")),
+ r"\texttt" => ("", Some("")),
+ r"\textsc" => ("", Some("")),
+};
+
+#[derive(Logos, Debug)]
+enum Token {
+ #[token("{")]
+ OpenCurlyBracket,
+ #[token("}")]
+ ClosedCurlyBracket,
+ #[regex(r"\\[\*\w]+ ?")]
+ LaTeXMacro,
+ #[token(r"\ ")]
+ ForcedSpace,
+}
+
+pub fn optimized_sanitize(input_text: &str) -> String {
+ let mut char_counter: usize = 0;
+ let mut contains_macro: bool = false;
+ for char in input_text.chars() {
+ if char == '\\' {
+ contains_macro = true;
+ }
+ char_counter = char_counter.saturating_add(1);
+ }
+ if !contains_macro {
+ input_text.to_string()
+ } else {
+ let mut out: Vec<&str> = Vec::with_capacity(char_counter);
+ let mut bracket_counter: u32 = 0;
+ let mut bc_up: bool = false;
+ let mut counter_actions: HashMap<u32, &str> = HashMap::new();
+ let mut lex = Token::lexer(input_text);
+ while let Some(sometoken) = lex.next() {
+ match sometoken {
+ Ok(token) => match token {
+ Token::ForcedSpace => {
+ out.push(" ");
+ bc_up = false;
+ }
+ Token::OpenCurlyBracket => {
+ if bc_up {
+ bracket_counter = bracket_counter.saturating_add(1);
+ } else {
+ out.push("{")
+ }
+ }
+ Token::ClosedCurlyBracket => {
+ if bracket_counter == 0 {
+ out.push("}")
+ } else {
+ match counter_actions.remove(&bracket_counter) {
+ None => out.push("}"),
+ Some(a) => out.push(a),
+ }
+ bracket_counter = bracket_counter - 1;
+ }
+ }
+ Token::LaTeXMacro => {
+ let texmacro = lex.slice();
+ if let Some(x) = LOOKUP.get(&texmacro.trim_end()) {
+ if let Some(end) = x.1 {
+ bc_up = true;
+ counter_actions.insert(bracket_counter + 1, end);
+ }
+ out.push(x.0);
+ } else {
+ out.push(texmacro)
+ }
+ }
+ },
+ Err(_) => {
+ out.push(lex.slice());
+ bc_up = false;
+ }
+ }
+ }
+ out.into_iter().collect::<String>()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::optimized_sanitize;
+
+ #[test]
+ fn check_sanitization() {
+ let result = optimized_sanitize(
+ r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}",
+ );
+ assert_eq!(
+ "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"",
+ result
+ )
+ }
+}
diff --git a/src/bibiman/search.rs b/src/bibiman/search.rs
index e0c5f17..2156634 100644
--- a/src/bibiman/search.rs
+++ b/src/bibiman/search.rs
@@ -141,7 +141,9 @@ mod tests {
subtitle: None,
notes: None,
symbols: [None, None, None],
- };
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized();
let joined_vec = BibiSearch::convert_to_string(&bibvec);
diff --git a/src/tui/ui.rs b/src/tui/ui.rs
index 3e6e24c..87d8c29 100644
--- a/src/tui/ui.rs
+++ b/src/tui/ui.rs
@@ -894,7 +894,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
Style::new().fg(cfg.colors.author_color),
),
]));
- if cur_entry.subtitle.is_some() {
+ if let Some(subtitle) = cur_entry.subtitle() {
lines.push(Line::from(vec![
Span::styled("Title: ", style_value),
Span::styled(
@@ -910,7 +910,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
.add_modifier(Modifier::ITALIC),
),
Span::styled(
- cur_entry.subtitle(),
+ subtitle,
Style::new()
.fg(cfg.colors.title_color)
.add_modifier(Modifier::ITALIC),
@@ -999,7 +999,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
}
lines.push(Line::from(""));
lines.push(Line::from(vec![Span::styled(
- cur_entry.abstract_text.clone(),
+ cur_entry.get_abstract(),
Style::new().fg(cfg.colors.main_text_color),
)]));
lines
diff --git a/tests/biblatex-test.bib b/tests/biblatex-test.bib
index cfcdc80..2149e7c 100644
--- a/tests/biblatex-test.bib
+++ b/tests/biblatex-test.bib
@@ -28,9 +28,9 @@
@article{aksin,
title = {Effect of immobilization on catalytic characteristics of saturated
{Pd-N}-heterocyclic carbenes in {Mizoroki-Heck} reactions},
- author = {Aks{\i}n, {\"O}zge and T{\"u}rkmen, Hayati and Artok, Levent and {
- \c{C}}etinkaya, Bekir and Ni, Chaoying and B{\"u}y{\"u}kg{\"u}ng{\"
- o}r, Orhan and {\"O}zkal, Erhan},
+ author = {Aks{\i}n, {\"O}zge and T{\"u}rkmen, Hayati and Artok , Levent and
+ { \c{C}}etinkaya, Bekir and Ni, Chaoying and B{\" u}y{ \"u}kg{\"u}
+ ng{ \" o}r, Orhan and {\"O}zkal, Erhan},
volume = {691},
number = {13},
pages = {3027--3036},
@@ -82,7 +82,12 @@
langidopts = {variant=american},
file = {~/Documents/coding/projects/bibiman/tests/aristotle_physics.pdf},
annotation = {A \texttt{book} entry with a \texttt{translator} field},
- abstract = {The Physics is a work by Aristotle dedicated to the study of nature. Regarded by Heidegger as "the fundamental work of Western philosophy", it presents the renowned distinction between the four types of cause, as well as reflections on chance, motion, infinity, and other fundamental concepts. It is here that Aristotle sets out his celebrated paradox of time.},
+ abstract = {The Physics is a work by Aristotle dedicated to the study of
+ nature. Regarded by Heidegger as "the fundamental work of Western
+ philosophy", it presents the renowned distinction between the
+ four types of cause, as well as reflections on chance, motion,
+ infinity, and other fundamental concepts. It is here that
+ Aristotle sets out his celebrated paradox of time.},
}
@book{aristotle_poetics,
@@ -102,7 +107,7 @@
}
@mvbook{aristotle_rhetoric,
- title = {The Rhetoric of {Aristotle} with a commentary by the late {Edward
+ title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward
Meredith Cope}},
shorttitle = {Rhetoric},
author = {Aristotle},
@@ -122,7 +127,7 @@
}
@book{augustine,
- title = {Heterogeneous catalysis for the synthetic chemist},
+ title = {Heterogeneous catalysis for the synthetic \textit{chemist}},
shorttitle = {Heterogeneous catalysis},
author = {Augustine, Robert L.},
location = {New York},
@@ -284,7 +289,7 @@
annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{
doi} field. Note that the \textsc{doi} is transformed into a
clickable link if \texttt{hyperref} support has been enabled},
- abstract = {The computation of ionic solvation free energies from atomistic
+ abstract = {The computation of \texttt{ionic} solvation free energies from atomistic
simulations is a surprisingly difficult problem that has found no
satisfactory solution for more than 15 years. The reason is that
the charging free energies evaluated from such simulations are