aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorlukeflo2025-10-06 11:52:32 +0200
committerlukeflo2025-10-06 11:52:32 +0200
commitecc4e102a5e8203407ca30f4d7c0abb871fa7111 (patch)
tree5d859c8f9859e66507c717727805d86b4543294e /src
parentdb0deb9b6ee59c07ab2f04972184b154925034bd (diff)
parentf84ebacd1ea47b09c58dd1ef1eaaf70feaacbe0f (diff)
downloadbibiman-ecc4e102a5e8203407ca30f4d7c0abb871fa7111.tar.gz
bibiman-ecc4e102a5e8203407ca30f4d7c0abb871fa7111.zip
Merge pull request 'Parse LaTeX macros in entry fields' (#57) from Klimperfix/bibiman:impl-latex-macro-sanitizing into main
Reviewed-on: https://codeberg.org/lukeflo/bibiman/pulls/57 Reviewed-by: lukeflo <lukeflo@noreply.codeberg.org>
Diffstat (limited to 'src')
-rw-r--r--src/bibiman.rs3
-rw-r--r--src/bibiman/bibisetup.rs85
-rw-r--r--src/bibiman/entries.rs8
-rw-r--r--src/bibiman/sanitize.rs43
-rw-r--r--src/bibiman/sanitize/optimized_sanitize.rs125
-rw-r--r--src/bibiman/search.rs4
-rw-r--r--src/tui/ui.rs6
7 files changed, 252 insertions, 22 deletions
diff --git a/src/bibiman.rs b/src/bibiman.rs
index 6d21f8c..c423ce1 100644
--- a/src/bibiman.rs
+++ b/src/bibiman.rs
@@ -44,6 +44,9 @@ pub mod entries;
pub mod keywords;
pub mod search;
+/// Module with function to sanitize text with LaTeX Macros into readable unicode text.
+pub mod sanitize;
+
// Areas in which actions are possible
#[derive(Debug)]
pub enum CurrentArea {
diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index 3bcb717..b3f788c 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -26,6 +26,7 @@ use std::{fs, path::PathBuf};
use walkdir::WalkDir;
use crate::app;
+use crate::bibiman::sanitize::sanitize_one_bibidata;
use crate::cliargs::{self};
use crate::config::BibiConfig;
@@ -76,6 +77,18 @@ pub struct BibiData {
pub subtitle: Option<String>,
pub notes: Option<Vec<OsString>>,
pub symbols: [Option<String>; 3],
+ /// This field should be set to None when initially creating a BibiData instance.
+ /// It then can be generated from the constructed BibiData Object using
+ /// `BibiData::gen_sanitized()`
+ pub sanitized_bibi_data: Option<SanitizedBibiData>,
+}
+
+/// Struct that holds sanitized bibidata data.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct SanitizedBibiData {
+ pub title: String,
+ pub subtitle: Option<String>,
+ pub abstract_text: String,
}
#[derive(Debug, Clone, PartialEq)]
@@ -119,22 +132,38 @@ impl BibiData {
// self.pubtype(),
// &self.symbols,
// ]
-
- BibiRow {
- authors: {
- if self.short_author.is_empty() {
- self.authors()
- } else {
- &self.short_author
- }
- },
- title: self.title(),
- year: self.year(),
- custom_field_value: self.custom_field_value(),
- symbols: &self.symbols,
+ let author_ref = if self.short_author.is_empty() {
+ self.authors()
+ } else {
+ &self.short_author
+ };
+ if let Some(sanidata) = &self.sanitized_bibi_data {
+ BibiRow {
+ authors: author_ref,
+ title: &sanidata.title,
+ year: self.year(),
+ custom_field_value: self.custom_field_value(),
+ symbols: &self.symbols,
+ }
+ } else {
+ BibiRow {
+ authors: author_ref,
+ title: self.title(),
+ year: self.year(),
+ custom_field_value: self.custom_field_value(),
+ symbols: &self.symbols,
+ }
}
}
+ /// Generates the SanitizedBibiData for the BibiData.
+ ///
+ /// Consumes self and returns a new BibiData struct.
+ pub fn gen_sanitized(mut self) -> Self {
+ self.sanitized_bibi_data = Some(sanitize_one_bibidata(&self));
+ self
+ }
+
pub fn entry_id(&self) -> &u32 {
&self.id
}
@@ -144,7 +173,11 @@ impl BibiData {
}
pub fn title(&self) -> &str {
- &self.title
+ if let Some(sani_data) = &self.sanitized_bibi_data {
+ &sani_data.title
+ } else {
+ &self.title
+ }
}
pub fn year(&self) -> &str {
@@ -175,8 +208,20 @@ impl BibiData {
.collect_vec()
}
- pub fn subtitle(&self) -> &str {
- self.subtitle.as_ref().unwrap()
+ pub fn subtitle(&self) -> Option<&str> {
+ if let Some(sani_data) = &self.sanitized_bibi_data {
+ sani_data.subtitle.as_ref().map(|s| s.as_str())
+ } else {
+ self.subtitle.as_ref().map(|s| s.as_str())
+ }
+ }
+
+ pub fn get_abstract(&self) -> &str {
+ if let Some(sani_data) = &self.sanitized_bibi_data {
+ &sani_data.abstract_text
+ } else {
+ &self.abstract_text
+ }
}
fn create_symbols(&self, cfg: &BibiConfig) -> [Option<String>; 3] {
@@ -287,6 +332,12 @@ impl BibiSetup {
} else {
None
};
+ //
+ //
+ // bibiman will sanitize some fields at this point,
+ // this may cause longer startup-load-times.
+ //
+ //
citekeys
.iter()
.enumerate()
@@ -317,7 +368,9 @@ impl BibiSetup {
None
},
symbols: [None, None, None],
+ sanitized_bibi_data: None,
}
+ .gen_sanitized()
})
.collect()
}
diff --git a/src/bibiman/entries.rs b/src/bibiman/entries.rs
index db6d6bf..0b35a8b 100644
--- a/src/bibiman/entries.rs
+++ b/src/bibiman/entries.rs
@@ -174,7 +174,9 @@ mod tests {
subtitle: None,
notes: None,
symbols: [None, None, None],
- };
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized();
let entry_vec = BibiData::ref_vec(&mut entry, &cfg);
@@ -194,7 +196,9 @@ mod tests {
subtitle: None,
notes: None,
symbols: [None, None, None],
- };
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized();
let entry_vec_editors = BibiData::ref_vec(&mut entry_editors, &cfg);
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
new file mode 100644
index 0000000..9ccf4c4
--- /dev/null
+++ b/src/bibiman/sanitize.rs
@@ -0,0 +1,43 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025 lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use crate::bibiman::bibisetup::{BibiData, SanitizedBibiData};
+
+mod optimized_sanitize;
+use optimized_sanitize::optimized_sanitize;
+
+/// Helper macro to sanitize bibidata structs.
+/// Here lives the code that generates SanitizedBibiData
+/// structs from BibiData structs.
+macro_rules! optimized_sanitize_bibidata {
+ ($bibidata:expr) => {
+ SanitizedBibiData {
+ title: optimized_sanitize(&$bibidata.title),
+ subtitle: match &$bibidata.subtitle {
+ None => None,
+ Some(subtitle) => Some(optimized_sanitize(subtitle)),
+ },
+ abstract_text: optimized_sanitize(&$bibidata.abstract_text),
+ }
+ };
+}
+
+/// Sanitize one BibiData and return a SanitizedBibiData struct.
+/// This function does ignore any existing sanitization of the bibidata struct.
+pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
+ optimized_sanitize_bibidata!(bibidata)
+}
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
new file mode 100644
index 0000000..336cc56
--- /dev/null
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -0,0 +1,125 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025 lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use phf::phf_map;
+use std::collections::HashMap;
+
+use logos::Logos;
+
+static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! {
+ r"\mkbibquote" => ("\"", Some("\"")),
+ r"\enquote*" => ("\'", Some("\'")),
+ r"\enquote" => ("\"", Some("\"")),
+ r"\hyphen" => ("-", None),
+ r"\textbf" => ("", Some("")),
+ r"\textit" => ("", Some("")),
+ r"\texttt" => ("", Some("")),
+ r"\textsc" => ("", Some("")),
+};
+
+#[derive(Logos, Debug)]
+enum Token {
+ #[token("{")]
+ OpenCurlyBracket,
+ #[token("}")]
+ ClosedCurlyBracket,
+ #[regex(r"\\[\*\w]+ ?")]
+ LaTeXMacro,
+ #[token(r"\ ")]
+ ForcedSpace,
+}
+
+pub fn optimized_sanitize(input_text: &str) -> String {
+ let mut char_counter: usize = 0;
+ let mut contains_macro: bool = false;
+ for char in input_text.chars() {
+ if char == '\\' {
+ contains_macro = true;
+ }
+ char_counter = char_counter.saturating_add(1);
+ }
+ if !contains_macro {
+ input_text.to_string()
+ } else {
+ let mut out: Vec<&str> = Vec::with_capacity(char_counter);
+ let mut bracket_counter: u32 = 0;
+ let mut bc_up: bool = false;
+ let mut counter_actions: HashMap<u32, &str> = HashMap::new();
+ let mut lex = Token::lexer(input_text);
+ while let Some(sometoken) = lex.next() {
+ match sometoken {
+ Ok(token) => match token {
+ Token::ForcedSpace => {
+ out.push(" ");
+ bc_up = false;
+ }
+ Token::OpenCurlyBracket => {
+ if bc_up {
+ bracket_counter = bracket_counter.saturating_add(1);
+ } else {
+ out.push("{")
+ }
+ }
+ Token::ClosedCurlyBracket => {
+ if bracket_counter == 0 {
+ out.push("}")
+ } else {
+ match counter_actions.remove(&bracket_counter) {
+ None => out.push("}"),
+ Some(a) => out.push(a),
+ }
+ bracket_counter = bracket_counter - 1;
+ }
+ }
+ Token::LaTeXMacro => {
+ let texmacro = lex.slice();
+ if let Some(x) = LOOKUP.get(&texmacro.trim_end()) {
+ if let Some(end) = x.1 {
+ bc_up = true;
+ counter_actions.insert(bracket_counter + 1, end);
+ }
+ out.push(x.0);
+ } else {
+ out.push(texmacro)
+ }
+ }
+ },
+ Err(_) => {
+ out.push(lex.slice());
+ bc_up = false;
+ }
+ }
+ }
+ out.into_iter().collect::<String>()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::optimized_sanitize;
+
+ #[test]
+ fn check_sanitization() {
+ let result = optimized_sanitize(
+ r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}",
+ );
+ assert_eq!(
+ "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"",
+ result
+ )
+ }
+}
diff --git a/src/bibiman/search.rs b/src/bibiman/search.rs
index e0c5f17..2156634 100644
--- a/src/bibiman/search.rs
+++ b/src/bibiman/search.rs
@@ -141,7 +141,9 @@ mod tests {
subtitle: None,
notes: None,
symbols: [None, None, None],
- };
+ sanitized_bibi_data: None,
+ }
+ .gen_sanitized();
let joined_vec = BibiSearch::convert_to_string(&bibvec);
diff --git a/src/tui/ui.rs b/src/tui/ui.rs
index 3e6e24c..87d8c29 100644
--- a/src/tui/ui.rs
+++ b/src/tui/ui.rs
@@ -894,7 +894,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
Style::new().fg(cfg.colors.author_color),
),
]));
- if cur_entry.subtitle.is_some() {
+ if let Some(subtitle) = cur_entry.subtitle() {
lines.push(Line::from(vec![
Span::styled("Title: ", style_value),
Span::styled(
@@ -910,7 +910,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
.add_modifier(Modifier::ITALIC),
),
Span::styled(
- cur_entry.subtitle(),
+ subtitle,
Style::new()
.fg(cfg.colors.title_color)
.add_modifier(Modifier::ITALIC),
@@ -999,7 +999,7 @@ pub fn render_selected_item(app: &mut App, cfg: &BibiConfig, frame: &mut Frame,
}
lines.push(Line::from(""));
lines.push(Line::from(vec![Span::styled(
- cur_entry.abstract_text.clone(),
+ cur_entry.get_abstract(),
Style::new().fg(cfg.colors.main_text_color),
)]));
lines