aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlukeflo2025-10-08 13:49:06 +0200
committerlukeflo2025-10-08 13:49:06 +0200
commita07359a9a1da0c06c040f77158be31b3883b33ac (patch)
tree78317d775f5d62d7e48663bdb75794aae17b5ea4
parent67afd67d4d51a00079269d431a7058fc50750886 (diff)
downloadbibiman-a07359a9a1da0c06c040f77158be31b3883b33ac.tar.gz
bibiman-a07359a9a1da0c06c040f77158be31b3883b33ac.zip
refine matching and preformatting of fields for citekey formattin; add case field and enum
-rw-r--r--Cross.toml6
-rw-r--r--src/bibiman/citekeys.rs136
-rw-r--r--src/bibiman/sanitize.rs10
-rw-r--r--src/bibiman/sanitize/optimized_sanitize.rs28
-rw-r--r--src/config.rs24
5 files changed, 177 insertions, 27 deletions
diff --git a/Cross.toml b/Cross.toml
index e7cd27b..6140bf2 100644
--- a/Cross.toml
+++ b/Cross.toml
@@ -9,3 +9,9 @@ pre-build = [
"dpkg --add-architecture $CROSS_DEB_ARCH",
"apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH",
]
+
+[target.x86_64-unknown-freebsd]
+# pre-build = [
+# "dpkg --add-architecture $CROSS_DEB_ARCH",
+# "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH",
+# ]
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 4c36e80..a304e92 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -1,13 +1,40 @@
-use biblatex::Bibliography;
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025 lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use biblatex::{Bibliography, ChunksExt, Entry, Type};
use color_eyre::eyre::eyre;
use owo_colors::OwoColorize;
+use serde::{Deserialize, Serialize};
+
+use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig};
-use crate::config::BibiConfig;
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub(crate) enum CitekeyCase {
+ #[serde(alias = "uppercase", alias = "upper")]
+ Upper,
+ #[serde(alias = "lowercase", alias = "lower")]
+ Lower,
+}
#[derive(Debug, Default, Clone)]
pub(crate) struct CitekeyFormatting {
bib_entries: Bibliography,
fields: Vec<String>,
+ case: Option<CitekeyCase>,
}
impl CitekeyFormatting {
@@ -27,6 +54,7 @@ impl CitekeyFormatting {
Ok(Self {
bib_entries,
fields,
+ case: cfg.citekey_formatter.case.clone(),
})
}
@@ -38,17 +66,36 @@ impl CitekeyFormatting {
split_formatting_pat(pattern);
let formatted_field_str = {
let mut formatted_str = String::new();
- let field = entry.get_as::<String>(field).expect(&format!(
- "Couldn't find field {}",
- field.bold().bright_red()
- ));
+ let field = preformat_field(field, entry);
+ // let field = if let Ok(val) = entry.get_as::<String>(field) {
+ // val
+ // } else {
+ // eprintln!(
+ // "Unable to get field {} for entry {}",
+ // field.bright_red(),
+ // &entry.key.bold()
+ // );
+ // continue;
+ // };
+ // let field = entry.get_as::<String>(field).expect(&format!(
+ // "Couldn't find field {}",
+ // field.bold().bright_red()
+ // ));
let mut split_field = field.split_whitespace();
let mut words_passed = 0;
+ let word_count = if let Some(val) = word_count {
+ val
+ } else {
+ field.split_whitespace().count()
+ // split_field.size_hint().0 + 1
+ };
+ dbg!(word_count);
loop {
if let Some(field_slice) = split_field.next() {
formatted_str = formatted_str + format_word(field_slice, char_count);
words_passed += 1;
- if word_count.is_some_and(|count| count == words_passed) {
+ // if word_count.is_some_and(|count| count == words_passed) {
+ if word_count == words_passed {
formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
break;
} else {
@@ -68,7 +115,51 @@ impl CitekeyFormatting {
}
}
-fn preformat_field() {}
+/// Preformat some fields which are very common to be used in citekeys
+fn preformat_field(field: &str, entry: &mut Entry) -> String {
+ match field {
+ "title" => {
+ sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into()))
+ }
+ "author" => {
+ if let Ok(authors) = entry.author() {
+ let mut last_names = String::new();
+ for a in authors.iter() {
+ last_names = last_names + &a.name + " ";
+ }
+ dbg!(&last_names);
+ last_names
+ } else {
+ "NA".to_string()
+ }
+ }
+ "year" => {
+ if let Ok(date) = entry.date() {
+ date.to_chunks().format_verbatim()[..4].to_string()
+ } else {
+ entry.get_as::<String>(field).unwrap_or("NA".into())
+ }
+ }
+ "subtitle" => {
+ sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into()))
+ }
+ "editor" => {
+ if let Ok(editors) = entry.editors() {
+ let mut last_names = String::new();
+ for editortypes in editors.iter() {
+ for e in editortypes.0.iter() {
+ last_names = last_names + &e.name + " ";
+ }
+ }
+ last_names
+ } else {
+ "NA".to_string()
+ }
+ }
+ "pubtype" | "entrytype" => entry.entry_type.to_string(),
+ _ => entry.get_as::<String>(field).unwrap_or("Empty".into()),
+ }
+}
/// Cut of word at char count index if its set
fn format_word(word: &str, count: Option<usize>) -> &str {
@@ -122,7 +213,7 @@ mod tests {
use biblatex::Bibliography;
use itertools::Itertools;
- use crate::bibiman::citekeys::{CitekeyFormatting, split_formatting_pat};
+ use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat};
#[test]
fn split_citekey_pattern() {
@@ -149,19 +240,36 @@ mod tests {
#[test]
fn format_citekey_test() {
- let src = r"@book{tolkien1937, author = {Tolkien}, title = {\enquote{Lord} of the \textbf{Rings}}, year = {1937}}";
+ let src = r"
+ @book{bhambra_colonialism_social_theory_2021,
+ title = {Colonialism and \textbf{Modern Social Theory}},
+ author = {Bhambra, Gurminder K. and Holmwood, John},
+ location = {Cambridge and Medford},
+ publisher = {Polity Press},
+ date = {2021},
+ }
+ ";
let bibliography = Bibliography::parse(src).unwrap();
let mut formatting_struct = CitekeyFormatting {
bib_entries: bibliography,
fields: vec![
- "author;1;;-;_".into(),
- "title;3;3;_;_".into(),
+ "entrytype;;;;:".into(),
+ "author;;;-;_".into(),
+ "title;4;3;_;_".into(),
+ "location;;4;:;_".into(),
"year".into(),
],
+ case: None,
};
formatting_struct.do_formatting();
let keys = formatting_struct.bib_entries.keys().collect_vec();
- assert_eq!(keys[0], "Tolkien_Lor_of_the_1937");
- assert_eq!(keys[0].to_lowercase(), "tolkien_lor_of_the_1937");
+ assert_eq!(
+ keys[0],
+ "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021"
+ );
+ assert_eq!(
+ keys[0].to_lowercase(),
+ "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021"
+ );
}
}
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
index 9ccf4c4..8c1cc43 100644
--- a/src/bibiman/sanitize.rs
+++ b/src/bibiman/sanitize.rs
@@ -26,12 +26,12 @@ use optimized_sanitize::optimized_sanitize;
macro_rules! optimized_sanitize_bibidata {
($bibidata:expr) => {
SanitizedBibiData {
- title: optimized_sanitize(&$bibidata.title),
+ title: optimized_sanitize(false, &$bibidata.title),
subtitle: match &$bibidata.subtitle {
None => None,
- Some(subtitle) => Some(optimized_sanitize(subtitle)),
+ Some(subtitle) => Some(optimized_sanitize(false, subtitle)),
},
- abstract_text: optimized_sanitize(&$bibidata.abstract_text),
+ abstract_text: optimized_sanitize(false, &$bibidata.abstract_text),
}
};
}
@@ -41,3 +41,7 @@ macro_rules! optimized_sanitize_bibidata {
pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
optimized_sanitize_bibidata!(bibidata)
}
+
+pub fn sanitize_single_string_fully(input: &str) -> String {
+ optimized_sanitize(true, input)
+}
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index 336cc56..dff4d32 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -31,6 +31,17 @@ static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = ph
r"\textsc" => ("", Some("")),
};
+static LOOKUP_CLEAR_ALL: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! {
+ r"\mkbibquote" => ("", Some("")),
+ r"\enquote*" => ("", Some("")),
+ r"\enquote" => ("", Some("")),
+ r"\hyphen" => ("", None),
+ r"\textbf" => ("", Some("")),
+ r"\textit" => ("", Some("")),
+ r"\texttt" => ("", Some("")),
+ r"\textsc" => ("", Some("")),
+};
+
#[derive(Logos, Debug)]
enum Token {
#[token("{")]
@@ -43,7 +54,12 @@ enum Token {
ForcedSpace,
}
-pub fn optimized_sanitize(input_text: &str) -> String {
+pub fn optimized_sanitize(clear_all: bool, input_text: &str) -> String {
+ let lookup = if clear_all {
+ &LOOKUP_CLEAR_ALL
+ } else {
+ &LOOKUP
+ };
let mut char_counter: usize = 0;
let mut contains_macro: bool = false;
for char in input_text.chars() {
@@ -87,7 +103,7 @@ pub fn optimized_sanitize(input_text: &str) -> String {
}
Token::LaTeXMacro => {
let texmacro = lex.slice();
- if let Some(x) = LOOKUP.get(&texmacro.trim_end()) {
+ if let Some(x) = lookup.get(&texmacro.trim_end()) {
if let Some(end) = x.1 {
bc_up = true;
counter_actions.insert(bracket_counter + 1, end);
@@ -115,11 +131,17 @@ mod tests {
#[test]
fn check_sanitization() {
let result = optimized_sanitize(
+ false,
r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}",
);
assert_eq!(
"\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"",
result
- )
+ );
+ let result = optimized_sanitize(
+ true,
+ r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}",
+ );
+ assert_eq!("Intention und Intentionen sind bibquote.", result)
}
}
diff --git a/src/config.rs b/src/config.rs
index 78cfef9..8a333e4 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -16,21 +16,24 @@
/////
use std::{
- fs::{create_dir_all, File},
- io::{stdin, Write},
+ fs::{File, create_dir_all},
+ io::{Write, stdin},
path::PathBuf,
str::FromStr,
};
use color_eyre::{eyre::Result, owo_colors::OwoColorize};
use figment::{
- providers::{Format, Serialized, Toml},
Figment,
+ providers::{Format, Serialized, Toml},
};
use ratatui::style::Color;
use serde::{Deserialize, Serialize};
-use crate::{bibiman::bibisetup::CustomField, cliargs::CLIArgs};
+use crate::{
+ bibiman::{bibisetup::CustomField, citekeys::CitekeyCase},
+ cliargs::CLIArgs,
+};
const DEFAULT_CONFIG: &str = r##"
# [general]
@@ -147,6 +150,7 @@ pub struct Colors {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct CitekeyFormatter {
pub fields: Vec<String>,
+ pub case: Option<CitekeyCase>,
}
impl Default for BibiConfig {
@@ -167,7 +171,10 @@ impl Default for BibiConfig {
custom_column: CustomField::Pubtype,
},
colors: Self::dark_colors(),
- citekey_formatter: CitekeyFormatter { fields: Vec::new() },
+ citekey_formatter: CitekeyFormatter {
+ fields: Vec::new(),
+ case: None,
+ },
}
}
}
@@ -194,7 +201,10 @@ impl BibiConfig {
} else {
Self::dark_colors()
},
- citekey_formatter: CitekeyFormatter { fields: Vec::new() },
+ citekey_formatter: CitekeyFormatter {
+ fields: Vec::new(),
+ case: None,
+ },
}
}
@@ -352,8 +362,8 @@ fn select_opener() -> String {
#[cfg(test)]
mod tests {
use figment::{
- providers::{Format, Toml},
Figment,
+ providers::{Format, Toml},
};
use super::BibiConfig;