diff options
| author | lukeflo | 2025-10-08 13:49:06 +0200 |
|---|---|---|
| committer | lukeflo | 2025-10-08 13:49:06 +0200 |
| commit | a07359a9a1da0c06c040f77158be31b3883b33ac (patch) | |
| tree | 78317d775f5d62d7e48663bdb75794aae17b5ea4 | |
| parent | 67afd67d4d51a00079269d431a7058fc50750886 (diff) | |
| download | bibiman-a07359a9a1da0c06c040f77158be31b3883b33ac.tar.gz bibiman-a07359a9a1da0c06c040f77158be31b3883b33ac.zip | |
refine matching and preformatting of fields for citekey formattin; add case field and enum
| -rw-r--r-- | Cross.toml | 6 | ||||
| -rw-r--r-- | src/bibiman/citekeys.rs | 136 | ||||
| -rw-r--r-- | src/bibiman/sanitize.rs | 10 | ||||
| -rw-r--r-- | src/bibiman/sanitize/optimized_sanitize.rs | 28 | ||||
| -rw-r--r-- | src/config.rs | 24 |
5 files changed, 177 insertions, 27 deletions
@@ -9,3 +9,9 @@ pre-build = [ "dpkg --add-architecture $CROSS_DEB_ARCH", "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH", ] + +[target.x86_64-unknown-freebsd] +# pre-build = [ +# "dpkg --add-architecture $CROSS_DEB_ARCH", +# "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH", +# ] diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 4c36e80..a304e92 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -1,13 +1,40 @@ -use biblatex::Bibliography; +// bibiman - a TUI for managing BibLaTeX databases +// Copyright (C) 2025 lukeflo +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <https://www.gnu.org/licenses/>. +///// + +use biblatex::{Bibliography, ChunksExt, Entry, Type}; use color_eyre::eyre::eyre; use owo_colors::OwoColorize; +use serde::{Deserialize, Serialize}; + +use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; -use crate::config::BibiConfig; +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub(crate) enum CitekeyCase { + #[serde(alias = "uppercase", alias = "upper")] + Upper, + #[serde(alias = "lowercase", alias = "lower")] + Lower, +} #[derive(Debug, Default, Clone)] pub(crate) struct CitekeyFormatting { bib_entries: Bibliography, fields: Vec<String>, + case: Option<CitekeyCase>, } impl CitekeyFormatting { @@ -27,6 +54,7 @@ impl CitekeyFormatting { Ok(Self { bib_entries, fields, + case: cfg.citekey_formatter.case.clone(), }) } @@ -38,17 +66,36 @@ impl CitekeyFormatting { split_formatting_pat(pattern); let formatted_field_str = { let mut formatted_str = String::new(); - let field = entry.get_as::<String>(field).expect(&format!( - "Couldn't find field {}", - field.bold().bright_red() - )); + let field = preformat_field(field, entry); + // let field = if let Ok(val) = entry.get_as::<String>(field) { + // val + // } else { + // eprintln!( + // "Unable to get field {} for entry {}", + // field.bright_red(), + // &entry.key.bold() + // ); + // continue; + // }; + // let field = entry.get_as::<String>(field).expect(&format!( + // "Couldn't find field {}", + // field.bold().bright_red() + // )); let mut split_field = field.split_whitespace(); let mut words_passed = 0; + let word_count = if let Some(val) = word_count { + val + } else { + field.split_whitespace().count() + // split_field.size_hint().0 + 1 + }; + dbg!(word_count); loop { if let Some(field_slice) = split_field.next() { formatted_str = formatted_str + format_word(field_slice, char_count); words_passed += 1; - if word_count.is_some_and(|count| count == words_passed) { + // if word_count.is_some_and(|count| count == words_passed) { + if word_count == words_passed { formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); break; } else { @@ -68,7 +115,51 @@ impl CitekeyFormatting { } } -fn preformat_field() {} +/// Preformat some fields which are very common to be used in citekeys +fn preformat_field(field: &str, entry: &mut Entry) -> String { + match field { + "title" => { + sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into())) + } + "author" => { + if let Ok(authors) = entry.author() { + let mut last_names = String::new(); + for a in authors.iter() { + last_names = last_names + &a.name + " "; + } + dbg!(&last_names); + last_names + } else { + "NA".to_string() + } + } + "year" => { + if let Ok(date) = entry.date() { + date.to_chunks().format_verbatim()[..4].to_string() + } else { + entry.get_as::<String>(field).unwrap_or("NA".into()) + } + } + "subtitle" => { + sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into())) + } + "editor" => { + if let Ok(editors) = entry.editors() { + let mut last_names = String::new(); + for editortypes in editors.iter() { + for e in editortypes.0.iter() { + last_names = last_names + &e.name + " "; + } + } + last_names + } else { + "NA".to_string() + } + } + "pubtype" | "entrytype" => entry.entry_type.to_string(), + _ => entry.get_as::<String>(field).unwrap_or("Empty".into()), + } +} /// Cut of word at char count index if its set fn format_word(word: &str, count: Option<usize>) -> &str { @@ -122,7 +213,7 @@ mod tests { use biblatex::Bibliography; use itertools::Itertools; - use crate::bibiman::citekeys::{CitekeyFormatting, split_formatting_pat}; + use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat}; #[test] fn split_citekey_pattern() { @@ -149,19 +240,36 @@ mod tests { #[test] fn format_citekey_test() { - let src = r"@book{tolkien1937, author = {Tolkien}, title = {\enquote{Lord} of the \textbf{Rings}}, year = {1937}}"; + let src = r" + @book{bhambra_colonialism_social_theory_2021, + title = {Colonialism and \textbf{Modern Social Theory}}, + author = {Bhambra, Gurminder K. and Holmwood, John}, + location = {Cambridge and Medford}, + publisher = {Polity Press}, + date = {2021}, + } + "; let bibliography = Bibliography::parse(src).unwrap(); let mut formatting_struct = CitekeyFormatting { bib_entries: bibliography, fields: vec![ - "author;1;;-;_".into(), - "title;3;3;_;_".into(), + "entrytype;;;;:".into(), + "author;;;-;_".into(), + "title;4;3;_;_".into(), + "location;;4;:;_".into(), "year".into(), ], + case: None, }; formatting_struct.do_formatting(); let keys = formatting_struct.bib_entries.keys().collect_vec(); - assert_eq!(keys[0], "Tolkien_Lor_of_the_1937"); - assert_eq!(keys[0].to_lowercase(), "tolkien_lor_of_the_1937"); + assert_eq!( + keys[0], + "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" + ); + assert_eq!( + keys[0].to_lowercase(), + "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" + ); } } diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs index 9ccf4c4..8c1cc43 100644 --- a/src/bibiman/sanitize.rs +++ b/src/bibiman/sanitize.rs @@ -26,12 +26,12 @@ use optimized_sanitize::optimized_sanitize; macro_rules! optimized_sanitize_bibidata { ($bibidata:expr) => { SanitizedBibiData { - title: optimized_sanitize(&$bibidata.title), + title: optimized_sanitize(false, &$bibidata.title), subtitle: match &$bibidata.subtitle { None => None, - Some(subtitle) => Some(optimized_sanitize(subtitle)), + Some(subtitle) => Some(optimized_sanitize(false, subtitle)), }, - abstract_text: optimized_sanitize(&$bibidata.abstract_text), + abstract_text: optimized_sanitize(false, &$bibidata.abstract_text), } }; } @@ -41,3 +41,7 @@ macro_rules! optimized_sanitize_bibidata { pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData { optimized_sanitize_bibidata!(bibidata) } + +pub fn sanitize_single_string_fully(input: &str) -> String { + optimized_sanitize(true, input) +} diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index 336cc56..dff4d32 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -31,6 +31,17 @@ static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = ph r"\textsc" => ("", Some("")), }; +static LOOKUP_CLEAR_ALL: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! { + r"\mkbibquote" => ("", Some("")), + r"\enquote*" => ("", Some("")), + r"\enquote" => ("", Some("")), + r"\hyphen" => ("", None), + r"\textbf" => ("", Some("")), + r"\textit" => ("", Some("")), + r"\texttt" => ("", Some("")), + r"\textsc" => ("", Some("")), +}; + #[derive(Logos, Debug)] enum Token { #[token("{")] @@ -43,7 +54,12 @@ enum Token { ForcedSpace, } -pub fn optimized_sanitize(input_text: &str) -> String { +pub fn optimized_sanitize(clear_all: bool, input_text: &str) -> String { + let lookup = if clear_all { + &LOOKUP_CLEAR_ALL + } else { + &LOOKUP + }; let mut char_counter: usize = 0; let mut contains_macro: bool = false; for char in input_text.chars() { @@ -87,7 +103,7 @@ pub fn optimized_sanitize(input_text: &str) -> String { } Token::LaTeXMacro => { let texmacro = lex.slice(); - if let Some(x) = LOOKUP.get(&texmacro.trim_end()) { + if let Some(x) = lookup.get(&texmacro.trim_end()) { if let Some(end) = x.1 { bc_up = true; counter_actions.insert(bracket_counter + 1, end); @@ -115,11 +131,17 @@ mod tests { #[test] fn check_sanitization() { let result = optimized_sanitize( + false, r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}", ); assert_eq!( "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"", result - ) + ); + let result = optimized_sanitize( + true, + r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}", + ); + assert_eq!("Intention und Intentionen sind bibquote.", result) } } diff --git a/src/config.rs b/src/config.rs index 78cfef9..8a333e4 100644 --- a/src/config.rs +++ b/src/config.rs @@ -16,21 +16,24 @@ ///// use std::{ - fs::{create_dir_all, File}, - io::{stdin, Write}, + fs::{File, create_dir_all}, + io::{Write, stdin}, path::PathBuf, str::FromStr, }; use color_eyre::{eyre::Result, owo_colors::OwoColorize}; use figment::{ - providers::{Format, Serialized, Toml}, Figment, + providers::{Format, Serialized, Toml}, }; use ratatui::style::Color; use serde::{Deserialize, Serialize}; -use crate::{bibiman::bibisetup::CustomField, cliargs::CLIArgs}; +use crate::{ + bibiman::{bibisetup::CustomField, citekeys::CitekeyCase}, + cliargs::CLIArgs, +}; const DEFAULT_CONFIG: &str = r##" # [general] @@ -147,6 +150,7 @@ pub struct Colors { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct CitekeyFormatter { pub fields: Vec<String>, + pub case: Option<CitekeyCase>, } impl Default for BibiConfig { @@ -167,7 +171,10 @@ impl Default for BibiConfig { custom_column: CustomField::Pubtype, }, colors: Self::dark_colors(), - citekey_formatter: CitekeyFormatter { fields: Vec::new() }, + citekey_formatter: CitekeyFormatter { + fields: Vec::new(), + case: None, + }, } } } @@ -194,7 +201,10 @@ impl BibiConfig { } else { Self::dark_colors() }, - citekey_formatter: CitekeyFormatter { fields: Vec::new() }, + citekey_formatter: CitekeyFormatter { + fields: Vec::new(), + case: None, + }, } } @@ -352,8 +362,8 @@ fn select_opener() -> String { #[cfg(test)] mod tests { use figment::{ - providers::{Format, Toml}, Figment, + providers::{Format, Toml}, }; use super::BibiConfig; |
