From 67afd67d4d51a00079269d431a7058fc50750886 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 7 Oct 2025 15:05:47 +0200 Subject: implement basic citekey formatting: * Reads patterns and parses them. TODO: * **Fully** sanitize Latex macros * Preprocess complex and regularly used fields like `author` * Write changes to original bib file --- Cargo.toml | 2 +- src/bibiman.rs | 4 +- src/bibiman/bibisetup.rs | 10 ++- src/bibiman/citekeys.rs | 167 +++++++++++++++++++++++++++++++++++++++++++++++ src/config.rs | 8 +++ 5 files changed, 187 insertions(+), 4 deletions(-) create mode 100644 src/bibiman/citekeys.rs diff --git a/Cargo.toml b/Cargo.toml index 66627b8..098848e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ readme = "README.md" description = "TUI for interacting with BibLaTeX databases" keywords = ["tui", "biblatex", "bibliography", "bibtex", "latex"] categories = ["command-line-utilities"] -edition = "2021" +edition = "2024" exclude = ["/tests", ".*"] [profile.release-git] diff --git a/src/bibiman.rs b/src/bibiman.rs index c423ce1..3158d73 100644 --- a/src/bibiman.rs +++ b/src/bibiman.rs @@ -40,6 +40,7 @@ use std::result::Result::Ok; use tui_input::Input; pub mod bibisetup; +pub mod citekeys; pub mod entries; pub mod keywords; pub mod search; @@ -88,13 +89,14 @@ pub struct Bibiman { } impl Bibiman { - // Constructs a new instance of [`App`]. + /// Constructs a new instance of [`Bibiman`]. pub fn new(args: &mut CLIArgs, cfg: &mut BibiConfig) -> Result { let mut main_bibfiles: Vec = args.pos_args.clone(); if cfg.general.bibfiles.is_some() { main_bibfiles.append(cfg.general.bibfiles.as_mut().unwrap()) }; let main_bibfiles = cliargs::parse_files(main_bibfiles); + // TODO: insert workflow for formatting citekeys let main_biblio = BibiSetup::new(&main_bibfiles, cfg); let tag_list = TagList::new(main_biblio.keyword_list.clone()); let search_struct = BibiSearch::default(); diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs index b3f788c..a83a507 100644 --- a/src/bibiman/bibisetup.rs +++ b/src/bibiman/bibisetup.rs @@ -246,8 +246,14 @@ impl BibiData { } impl BibiSetup { + /// Setup the TUI: + /// * Getting files + /// * Parse files into `biblatex::Bibliography` struct + /// * If wanted, format citekeys + /// * Get citekey vector + /// * Collect all keywords + /// * Build the entry list to be displayed pub fn new(main_bibfiles: &[PathBuf], cfg: &BibiConfig) -> Self { - // TODO: Needs check for config file path as soon as config file is impl Self::check_files(main_bibfiles); let bibfilestring = Self::bibfiles_to_string(main_bibfiles); let bibliography = biblatex::Bibliography::parse(&bibfilestring).unwrap(); @@ -264,7 +270,7 @@ impl BibiSetup { } } - // Check which file format the passed file has + /// Check which file format the passed file has fn check_files(main_bibfiles: &[PathBuf]) { if main_bibfiles.is_empty() { println!( diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs new file mode 100644 index 0000000..4c36e80 --- /dev/null +++ b/src/bibiman/citekeys.rs @@ -0,0 +1,167 @@ +use biblatex::Bibliography; +use color_eyre::eyre::eyre; +use owo_colors::OwoColorize; + +use crate::config::BibiConfig; + +#[derive(Debug, Default, Clone)] +pub(crate) struct CitekeyFormatting { + bib_entries: Bibliography, + fields: Vec, +} + +impl CitekeyFormatting { + /// Start Citekey formatting with building a new instance of `CitekeyFormatting` + /// Formatting is processed file by file, because `bibman` can handle + /// multi-file setups. + /// The `Bibliography` inserted will be edited in place with the new citekeys. + /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography` + pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result { + let fields = cfg.citekey_formatter.fields.clone(); + if fields.is_empty() { + return Err(eyre!( + "To format all citekeys, you need to provide {} values in the config file", + "fields".bold() + )); + } + Ok(Self { + bib_entries, + fields, + }) + } + + pub fn do_formatting(&mut self) { + for entry in self.bib_entries.iter_mut() { + let mut new_citekey = String::new(); + for pattern in self.fields.iter() { + let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = + split_formatting_pat(pattern); + let formatted_field_str = { + let mut formatted_str = String::new(); + let field = entry.get_as::(field).expect(&format!( + "Couldn't find field {}", + field.bold().bright_red() + )); + let mut split_field = field.split_whitespace(); + let mut words_passed = 0; + loop { + if let Some(field_slice) = split_field.next() { + formatted_str = formatted_str + format_word(field_slice, char_count); + words_passed += 1; + if word_count.is_some_and(|count| count == words_passed) { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; + } else { + formatted_str = formatted_str + inner_delimiter.unwrap_or("") + } + } else { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; + }; + } + formatted_str + }; + new_citekey = new_citekey + &formatted_field_str; + } + entry.key = new_citekey; + } + } +} + +fn preformat_field() {} + +/// Cut of word at char count index if its set +fn format_word(word: &str, count: Option) -> &str { + if let Some(len) = count + && len < word.chars().count() + { + &word[..len] + } else { + word + } +} + +/// Split a formatting pattern of kind +/// `;;;;`, +/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")` +fn split_formatting_pat( + pattern: &str, +) -> ( + &str, + Option, + Option, + Option<&str>, + Option<&str>, +) { + let mut splits = pattern.split(';'); + ( + splits + .next() + .expect("Need field value for formatting citekey"), + if let Some(next) = splits.next() + && next.len() > 0 + { + next.parse::().ok() + } else { + None + }, + if let Some(next) = splits.next() + && next.len() > 0 + { + next.parse::().ok() + } else { + None + }, + splits.next(), + splits.next(), + ) +} + +#[cfg(test)] +mod tests { + use biblatex::Bibliography; + use itertools::Itertools; + + use crate::bibiman::citekeys::{CitekeyFormatting, split_formatting_pat}; + + #[test] + fn split_citekey_pattern() { + let pattern = "title;3;5;_;_"; + + assert_eq!( + split_formatting_pat(pattern), + ("title", Some(3), Some(5), Some("_"), Some("_")) + ); + + let pattern = "year"; + + assert_eq!( + split_formatting_pat(pattern), + ("year", None, None, None, None) + ); + + let pattern = "author;1;;;_"; + assert_eq!( + split_formatting_pat(pattern), + ("author", Some(1), None, Some(""), Some("_")) + ); + } + + #[test] + fn format_citekey_test() { + let src = r"@book{tolkien1937, author = {Tolkien}, title = {\enquote{Lord} of the \textbf{Rings}}, year = {1937}}"; + let bibliography = Bibliography::parse(src).unwrap(); + let mut formatting_struct = CitekeyFormatting { + bib_entries: bibliography, + fields: vec![ + "author;1;;-;_".into(), + "title;3;3;_;_".into(), + "year".into(), + ], + }; + formatting_struct.do_formatting(); + let keys = formatting_struct.bib_entries.keys().collect_vec(); + assert_eq!(keys[0], "Tolkien_Lor_of_the_1937"); + assert_eq!(keys[0].to_lowercase(), "tolkien_lor_of_the_1937"); + } +} diff --git a/src/config.rs b/src/config.rs index 00a35b7..78cfef9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -102,6 +102,7 @@ const DEFAULT_CONFIG: &str = r##" pub struct BibiConfig { pub general: General, pub colors: Colors, + pub citekey_formatter: CitekeyFormatter, } /// Substruct [general] in config.toml @@ -143,6 +144,11 @@ pub struct Colors { pub year_color: Color, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CitekeyFormatter { + pub fields: Vec, +} + impl Default for BibiConfig { fn default() -> Self { Self { @@ -161,6 +167,7 @@ impl Default for BibiConfig { custom_column: CustomField::Pubtype, }, colors: Self::dark_colors(), + citekey_formatter: CitekeyFormatter { fields: Vec::new() }, } } } @@ -187,6 +194,7 @@ impl BibiConfig { } else { Self::dark_colors() }, + citekey_formatter: CitekeyFormatter { fields: Vec::new() }, } } -- cgit v1.2.3 From a07359a9a1da0c06c040f77158be31b3883b33ac Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 8 Oct 2025 13:49:06 +0200 Subject: refine matching and preformatting of fields for citekey formattin; add case field and enum --- Cross.toml | 6 ++ src/bibiman/citekeys.rs | 136 ++++++++++++++++++++++++++--- src/bibiman/sanitize.rs | 10 ++- src/bibiman/sanitize/optimized_sanitize.rs | 28 +++++- src/config.rs | 24 +++-- 5 files changed, 177 insertions(+), 27 deletions(-) diff --git a/Cross.toml b/Cross.toml index e7cd27b..6140bf2 100644 --- a/Cross.toml +++ b/Cross.toml @@ -9,3 +9,9 @@ pre-build = [ "dpkg --add-architecture $CROSS_DEB_ARCH", "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH", ] + +[target.x86_64-unknown-freebsd] +# pre-build = [ +# "dpkg --add-architecture $CROSS_DEB_ARCH", +# "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH", +# ] diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 4c36e80..a304e92 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -1,13 +1,40 @@ -use biblatex::Bibliography; +// bibiman - a TUI for managing BibLaTeX databases +// Copyright (C) 2025 lukeflo +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +///// + +use biblatex::{Bibliography, ChunksExt, Entry, Type}; use color_eyre::eyre::eyre; use owo_colors::OwoColorize; +use serde::{Deserialize, Serialize}; + +use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; -use crate::config::BibiConfig; +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub(crate) enum CitekeyCase { + #[serde(alias = "uppercase", alias = "upper")] + Upper, + #[serde(alias = "lowercase", alias = "lower")] + Lower, +} #[derive(Debug, Default, Clone)] pub(crate) struct CitekeyFormatting { bib_entries: Bibliography, fields: Vec, + case: Option, } impl CitekeyFormatting { @@ -27,6 +54,7 @@ impl CitekeyFormatting { Ok(Self { bib_entries, fields, + case: cfg.citekey_formatter.case.clone(), }) } @@ -38,17 +66,36 @@ impl CitekeyFormatting { split_formatting_pat(pattern); let formatted_field_str = { let mut formatted_str = String::new(); - let field = entry.get_as::(field).expect(&format!( - "Couldn't find field {}", - field.bold().bright_red() - )); + let field = preformat_field(field, entry); + // let field = if let Ok(val) = entry.get_as::(field) { + // val + // } else { + // eprintln!( + // "Unable to get field {} for entry {}", + // field.bright_red(), + // &entry.key.bold() + // ); + // continue; + // }; + // let field = entry.get_as::(field).expect(&format!( + // "Couldn't find field {}", + // field.bold().bright_red() + // )); let mut split_field = field.split_whitespace(); let mut words_passed = 0; + let word_count = if let Some(val) = word_count { + val + } else { + field.split_whitespace().count() + // split_field.size_hint().0 + 1 + }; + dbg!(word_count); loop { if let Some(field_slice) = split_field.next() { formatted_str = formatted_str + format_word(field_slice, char_count); words_passed += 1; - if word_count.is_some_and(|count| count == words_passed) { + // if word_count.is_some_and(|count| count == words_passed) { + if word_count == words_passed { formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); break; } else { @@ -68,7 +115,51 @@ impl CitekeyFormatting { } } -fn preformat_field() {} +/// Preformat some fields which are very common to be used in citekeys +fn preformat_field(field: &str, entry: &mut Entry) -> String { + match field { + "title" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + } + "author" => { + if let Ok(authors) = entry.author() { + let mut last_names = String::new(); + for a in authors.iter() { + last_names = last_names + &a.name + " "; + } + dbg!(&last_names); + last_names + } else { + "NA".to_string() + } + } + "year" => { + if let Ok(date) = entry.date() { + date.to_chunks().format_verbatim()[..4].to_string() + } else { + entry.get_as::(field).unwrap_or("NA".into()) + } + } + "subtitle" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + } + "editor" => { + if let Ok(editors) = entry.editors() { + let mut last_names = String::new(); + for editortypes in editors.iter() { + for e in editortypes.0.iter() { + last_names = last_names + &e.name + " "; + } + } + last_names + } else { + "NA".to_string() + } + } + "pubtype" | "entrytype" => entry.entry_type.to_string(), + _ => entry.get_as::(field).unwrap_or("Empty".into()), + } +} /// Cut of word at char count index if its set fn format_word(word: &str, count: Option) -> &str { @@ -122,7 +213,7 @@ mod tests { use biblatex::Bibliography; use itertools::Itertools; - use crate::bibiman::citekeys::{CitekeyFormatting, split_formatting_pat}; + use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat}; #[test] fn split_citekey_pattern() { @@ -149,19 +240,36 @@ mod tests { #[test] fn format_citekey_test() { - let src = r"@book{tolkien1937, author = {Tolkien}, title = {\enquote{Lord} of the \textbf{Rings}}, year = {1937}}"; + let src = r" + @book{bhambra_colonialism_social_theory_2021, + title = {Colonialism and \textbf{Modern Social Theory}}, + author = {Bhambra, Gurminder K. and Holmwood, John}, + location = {Cambridge and Medford}, + publisher = {Polity Press}, + date = {2021}, + } + "; let bibliography = Bibliography::parse(src).unwrap(); let mut formatting_struct = CitekeyFormatting { bib_entries: bibliography, fields: vec![ - "author;1;;-;_".into(), - "title;3;3;_;_".into(), + "entrytype;;;;:".into(), + "author;;;-;_".into(), + "title;4;3;_;_".into(), + "location;;4;:;_".into(), "year".into(), ], + case: None, }; formatting_struct.do_formatting(); let keys = formatting_struct.bib_entries.keys().collect_vec(); - assert_eq!(keys[0], "Tolkien_Lor_of_the_1937"); - assert_eq!(keys[0].to_lowercase(), "tolkien_lor_of_the_1937"); + assert_eq!( + keys[0], + "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" + ); + assert_eq!( + keys[0].to_lowercase(), + "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" + ); } } diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs index 9ccf4c4..8c1cc43 100644 --- a/src/bibiman/sanitize.rs +++ b/src/bibiman/sanitize.rs @@ -26,12 +26,12 @@ use optimized_sanitize::optimized_sanitize; macro_rules! optimized_sanitize_bibidata { ($bibidata:expr) => { SanitizedBibiData { - title: optimized_sanitize(&$bibidata.title), + title: optimized_sanitize(false, &$bibidata.title), subtitle: match &$bibidata.subtitle { None => None, - Some(subtitle) => Some(optimized_sanitize(subtitle)), + Some(subtitle) => Some(optimized_sanitize(false, subtitle)), }, - abstract_text: optimized_sanitize(&$bibidata.abstract_text), + abstract_text: optimized_sanitize(false, &$bibidata.abstract_text), } }; } @@ -41,3 +41,7 @@ macro_rules! optimized_sanitize_bibidata { pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData { optimized_sanitize_bibidata!(bibidata) } + +pub fn sanitize_single_string_fully(input: &str) -> String { + optimized_sanitize(true, input) +} diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index 336cc56..dff4d32 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -31,6 +31,17 @@ static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = ph r"\textsc" => ("", Some("")), }; +static LOOKUP_CLEAR_ALL: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! { + r"\mkbibquote" => ("", Some("")), + r"\enquote*" => ("", Some("")), + r"\enquote" => ("", Some("")), + r"\hyphen" => ("", None), + r"\textbf" => ("", Some("")), + r"\textit" => ("", Some("")), + r"\texttt" => ("", Some("")), + r"\textsc" => ("", Some("")), +}; + #[derive(Logos, Debug)] enum Token { #[token("{")] @@ -43,7 +54,12 @@ enum Token { ForcedSpace, } -pub fn optimized_sanitize(input_text: &str) -> String { +pub fn optimized_sanitize(clear_all: bool, input_text: &str) -> String { + let lookup = if clear_all { + &LOOKUP_CLEAR_ALL + } else { + &LOOKUP + }; let mut char_counter: usize = 0; let mut contains_macro: bool = false; for char in input_text.chars() { @@ -87,7 +103,7 @@ pub fn optimized_sanitize(input_text: &str) -> String { } Token::LaTeXMacro => { let texmacro = lex.slice(); - if let Some(x) = LOOKUP.get(&texmacro.trim_end()) { + if let Some(x) = lookup.get(&texmacro.trim_end()) { if let Some(end) = x.1 { bc_up = true; counter_actions.insert(bracket_counter + 1, end); @@ -115,11 +131,17 @@ mod tests { #[test] fn check_sanitization() { let result = optimized_sanitize( + false, r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}", ); assert_eq!( "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"", result - ) + ); + let result = optimized_sanitize( + true, + r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}", + ); + assert_eq!("Intention und Intentionen sind bibquote.", result) } } diff --git a/src/config.rs b/src/config.rs index 78cfef9..8a333e4 100644 --- a/src/config.rs +++ b/src/config.rs @@ -16,21 +16,24 @@ ///// use std::{ - fs::{create_dir_all, File}, - io::{stdin, Write}, + fs::{File, create_dir_all}, + io::{Write, stdin}, path::PathBuf, str::FromStr, }; use color_eyre::{eyre::Result, owo_colors::OwoColorize}; use figment::{ - providers::{Format, Serialized, Toml}, Figment, + providers::{Format, Serialized, Toml}, }; use ratatui::style::Color; use serde::{Deserialize, Serialize}; -use crate::{bibiman::bibisetup::CustomField, cliargs::CLIArgs}; +use crate::{ + bibiman::{bibisetup::CustomField, citekeys::CitekeyCase}, + cliargs::CLIArgs, +}; const DEFAULT_CONFIG: &str = r##" # [general] @@ -147,6 +150,7 @@ pub struct Colors { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct CitekeyFormatter { pub fields: Vec, + pub case: Option, } impl Default for BibiConfig { @@ -167,7 +171,10 @@ impl Default for BibiConfig { custom_column: CustomField::Pubtype, }, colors: Self::dark_colors(), - citekey_formatter: CitekeyFormatter { fields: Vec::new() }, + citekey_formatter: CitekeyFormatter { + fields: Vec::new(), + case: None, + }, } } } @@ -194,7 +201,10 @@ impl BibiConfig { } else { Self::dark_colors() }, - citekey_formatter: CitekeyFormatter { fields: Vec::new() }, + citekey_formatter: CitekeyFormatter { + fields: Vec::new(), + case: None, + }, } } @@ -352,8 +362,8 @@ fn select_opener() -> String { #[cfg(test)] mod tests { use figment::{ - providers::{Format, Toml}, Figment, + providers::{Format, Toml}, }; use super::BibiConfig; -- cgit v1.2.3 From 8b858f92da69cfb8fa43ec861cda46eeb6ef4bbe Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 8 Oct 2025 14:39:46 +0200 Subject: case parsing from config, needs to be implemented for citekey struct --- src/bibiman/citekeys.rs | 95 +++++++++++++++++++++++-------------------------- src/config.rs | 8 +++-- 2 files changed, 49 insertions(+), 54 deletions(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index a304e92..118ae3e 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -23,7 +23,7 @@ use serde::{Deserialize, Serialize}; use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub(crate) enum CitekeyCase { +pub enum CitekeyCase { #[serde(alias = "uppercase", alias = "upper")] Upper, #[serde(alias = "lowercase", alias = "lower")] @@ -44,7 +44,11 @@ impl CitekeyFormatting { /// The `Bibliography` inserted will be edited in place with the new citekeys. /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography` pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result { - let fields = cfg.citekey_formatter.fields.clone(); + let fields = cfg + .citekey_formatter + .fields + .clone() + .expect("Need to define fields in config to format citekeys"); if fields.is_empty() { return Err(eyre!( "To format all citekeys, you need to provide {} values in the config file", @@ -58,65 +62,54 @@ impl CitekeyFormatting { }) } + /// Process the actual formatting. The citekey of every entry will be updated. pub fn do_formatting(&mut self) { for entry in self.bib_entries.iter_mut() { - let mut new_citekey = String::new(); - for pattern in self.fields.iter() { - let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = - split_formatting_pat(pattern); - let formatted_field_str = { - let mut formatted_str = String::new(); - let field = preformat_field(field, entry); - // let field = if let Ok(val) = entry.get_as::(field) { - // val - // } else { - // eprintln!( - // "Unable to get field {} for entry {}", - // field.bright_red(), - // &entry.key.bold() - // ); - // continue; - // }; - // let field = entry.get_as::(field).expect(&format!( - // "Couldn't find field {}", - // field.bold().bright_red() - // )); - let mut split_field = field.split_whitespace(); - let mut words_passed = 0; - let word_count = if let Some(val) = word_count { - val + entry.key = build_citekey(entry, &self.fields); + } + } +} + +/// Build the citekey from the patterns defined in the config file +fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String { + let mut new_citekey = String::new(); + for pattern in pattern_fields.iter() { + let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = + split_formatting_pat(pattern); + let formatted_field_str = { + let mut formatted_str = String::new(); + let field = preformat_field(field, entry); + let mut split_field = field.split_whitespace(); + let mut words_passed = 0; + let word_count = if let Some(val) = word_count { + val + } else { + field.split_whitespace().count() + }; + loop { + if let Some(field_slice) = split_field.next() { + formatted_str = formatted_str + format_word(field_slice, char_count); + words_passed += 1; + if word_count == words_passed { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; } else { - field.split_whitespace().count() - // split_field.size_hint().0 + 1 - }; - dbg!(word_count); - loop { - if let Some(field_slice) = split_field.next() { - formatted_str = formatted_str + format_word(field_slice, char_count); - words_passed += 1; - // if word_count.is_some_and(|count| count == words_passed) { - if word_count == words_passed { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or("") - } - } else { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - }; + formatted_str = formatted_str + inner_delimiter.unwrap_or("") } - formatted_str + } else { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; }; - new_citekey = new_citekey + &formatted_field_str; } - entry.key = new_citekey; - } + formatted_str + }; + new_citekey = new_citekey + &formatted_field_str; } + new_citekey } /// Preformat some fields which are very common to be used in citekeys -fn preformat_field(field: &str, entry: &mut Entry) -> String { +fn preformat_field(field: &str, entry: &Entry) -> String { match field { "title" => { sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) diff --git a/src/config.rs b/src/config.rs index 8a333e4..a5df61c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -149,7 +149,7 @@ pub struct Colors { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct CitekeyFormatter { - pub fields: Vec, + pub fields: Option>, pub case: Option, } @@ -172,7 +172,7 @@ impl Default for BibiConfig { }, colors: Self::dark_colors(), citekey_formatter: CitekeyFormatter { - fields: Vec::new(), + fields: None, case: None, }, } @@ -202,7 +202,7 @@ impl BibiConfig { Self::dark_colors() }, citekey_formatter: CitekeyFormatter { - fields: Vec::new(), + fields: None, case: None, }, } @@ -400,6 +400,8 @@ mod tests { author_color = "38" title_color = "37" year_color = "135" + + [citekey_formatter] "#, )?; -- cgit v1.2.3 From 952dc94b412ffcff26a59c37f3112079c78058ff Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 8 Oct 2025 22:30:46 +0200 Subject: use vector for old new key pairs --- src/bibiman/citekeys.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 118ae3e..b389da2 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -15,6 +15,12 @@ // along with this program. If not, see . ///// +use std::{ + fs::File, + io::Write, + path::{Path, PathBuf}, +}; + use biblatex::{Bibliography, ChunksExt, Entry, Type}; use color_eyre::eyre::eyre; use owo_colors::OwoColorize; @@ -32,9 +38,11 @@ pub enum CitekeyCase { #[derive(Debug, Default, Clone)] pub(crate) struct CitekeyFormatting { + bibfile_path: PathBuf, bib_entries: Bibliography, fields: Vec, case: Option, + old_new_keys_map: Vec<(String, String)>, } impl CitekeyFormatting { @@ -43,7 +51,11 @@ impl CitekeyFormatting { /// multi-file setups. /// The `Bibliography` inserted will be edited in place with the new citekeys. /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography` - pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result { + pub fn new>( + cfg: &BibiConfig, + path: P, + bib_entries: Bibliography, + ) -> color_eyre::Result { let fields = cfg .citekey_formatter .fields @@ -56,22 +68,39 @@ impl CitekeyFormatting { )); } Ok(Self { + bibfile_path: path.as_ref().to_path_buf(), bib_entries, fields, case: cfg.citekey_formatter.case.clone(), + old_new_keys_map: Vec::new(), }) } /// Process the actual formatting. The citekey of every entry will be updated. pub fn do_formatting(&mut self) { + let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter_mut() { - entry.key = build_citekey(entry, &self.fields); + old_new_keys.push(( + entry.key.clone(), + build_citekey(entry, &self.fields, self.case.as_ref()), + )); } + + self.old_new_keys_map = old_new_keys; + } + + /// Write entries with updated citekeys to bibfile + pub fn update_file(&self) -> color_eyre::Result<()> { + let mut file = File::open(&self.bibfile_path)?; + + file.write_all(self.bib_entries.to_biblatex_string().as_bytes())?; + + Ok(()) } } /// Build the citekey from the patterns defined in the config file -fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String { +fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { let mut new_citekey = String::new(); for pattern in pattern_fields.iter() { let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = @@ -105,7 +134,14 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String { }; new_citekey = new_citekey + &formatted_field_str; } - new_citekey + if let Some(case_format) = case { + match case_format { + CitekeyCase::Lower => new_citekey.to_lowercase(), + CitekeyCase::Upper => new_citekey.to_uppercase(), + } + } else { + new_citekey + } } /// Preformat some fields which are very common to be used in citekeys @@ -203,6 +239,8 @@ fn split_formatting_pat( #[cfg(test)] mod tests { + use std::path::PathBuf; + use biblatex::Bibliography; use itertools::Itertools; @@ -244,6 +282,7 @@ mod tests { "; let bibliography = Bibliography::parse(src).unwrap(); let mut formatting_struct = CitekeyFormatting { + bibfile_path: PathBuf::new(), bib_entries: bibliography, fields: vec![ "entrytype;;;;:".into(), @@ -253,6 +292,7 @@ mod tests { "year".into(), ], case: None, + old_new_keys_map: Vec::new(), }; formatting_struct.do_formatting(); let keys = formatting_struct.bib_entries.keys().collect_vec(); @@ -260,9 +300,23 @@ mod tests { keys[0], "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" ); + formatting_struct.case = Some(CitekeyCase::Lower); + formatting_struct.do_formatting(); + let keys = formatting_struct.bib_entries.keys().collect_vec(); assert_eq!( - keys[0].to_lowercase(), + keys[0], "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" ); + // let bib_string = formatting_struct.bib_entries.to_biblatex_string(); + // let new_entry = r" + // @book{book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021, + // title = {Colonialism and \textbf{Modern Social Theory}}, + // author = {Bhambra, Gurminder K. and Holmwood, John}, + // location = {Cambridge and Medford}, + // publisher = {Polity Press}, + // date = {2021}, + // } + // "; + // assert_eq!(new_entry, bib_string); } } -- cgit v1.2.3 From 7266a14753ed5d572aeed584b66b07d1b9921ca7 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Thu, 9 Oct 2025 14:28:55 +0200 Subject: rewrite cli parsing; need to implement format-citekeys cli parsing --- src/bibiman/citekeys.rs | 105 ++++++++++++++++++++++++++++++++++++------------ src/cliargs.rs | 52 +++++++++++++++++++----- src/main.rs | 30 +------------- 3 files changed, 124 insertions(+), 63 deletions(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index b389da2..b7995ac 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -16,7 +16,7 @@ ///// use std::{ - fs::File, + fs::OpenOptions, io::Write, path::{Path, PathBuf}, }; @@ -38,7 +38,9 @@ pub enum CitekeyCase { #[derive(Debug, Default, Clone)] pub(crate) struct CitekeyFormatting { - bibfile_path: PathBuf, + /// bibfile to replace keys at. The optional fields defines a differing + /// output file to write to, otherwise original file will be overwritten. + bibfile_path: (PathBuf, Option), bib_entries: Bibliography, fields: Vec, case: Option, @@ -54,6 +56,7 @@ impl CitekeyFormatting { pub fn new>( cfg: &BibiConfig, path: P, + target: Option

, bib_entries: Bibliography, ) -> color_eyre::Result { let fields = cfg @@ -68,7 +71,10 @@ impl CitekeyFormatting { )); } Ok(Self { - bibfile_path: path.as_ref().to_path_buf(), + bibfile_path: ( + path.as_ref().to_path_buf(), + target.map(|p| p.as_ref().to_path_buf()), + ), bib_entries, fields, case: cfg.citekey_formatter.case.clone(), @@ -77,9 +83,9 @@ impl CitekeyFormatting { } /// Process the actual formatting. The citekey of every entry will be updated. - pub fn do_formatting(&mut self) { + pub fn do_formatting(&mut self) -> &mut Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); - for entry in self.bib_entries.iter_mut() { + for entry in self.bib_entries.iter() { old_new_keys.push(( entry.key.clone(), build_citekey(entry, &self.fields, self.case.as_ref()), @@ -87,16 +93,47 @@ impl CitekeyFormatting { } self.old_new_keys_map = old_new_keys; + + self } /// Write entries with updated citekeys to bibfile pub fn update_file(&self) -> color_eyre::Result<()> { - let mut file = File::open(&self.bibfile_path)?; + let source_file = self.bibfile_path.0.as_path(); + let target_file = if let Some(path) = &self.bibfile_path.1 { + path + } else { + source_file + }; + let mut content = std::fs::read_to_string(source_file)?; - file.write_all(self.bib_entries.to_biblatex_string().as_bytes())?; + for (old_key, new_key) in self.old_new_keys_map.iter() { + content = content.replace(old_key, new_key); + } + + let mut new_file = OpenOptions::new() + .truncate(true) + .write(true) + .create(true) + .open(target_file)?; + + new_file.write_all(content.as_bytes())?; Ok(()) } + + /// Sort the vector containing old/new citekey pairs by the length of the latter. + /// That will prevent the replacement longer key parts that equal a full shorter + /// key. + /// + /// You are **very encouraged** to call this method before `update_file()` to + /// prevent replacing citekeys partly which afterwards wont match the pattern + /// anymore. + pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self { + self.old_new_keys_map + .sort_by(|a, b| b.1.len().cmp(&a.1.len())); + self + } } /// Build the citekey from the patterns defined in the config file @@ -272,6 +309,18 @@ mod tests { #[test] fn format_citekey_test() { let src = r" + @article{bos_latex_metadata_and_publishing_workflows_2023, + title = {{LaTeX}, metadata, and publishing workflows}, + author = {Bos, Joppe W. and {McCurley}, Kevin S.}, + year = {2023}, + month = apr, + journal = {arXiv}, + number = {{arXiv}:2301.08277}, + doi = {10.48550/arXiv.2301.08277}, + url = {http://arxiv.org/abs/2301.08277}, + urldate = {2023-08-22}, + note = {type: article}, + } @book{bhambra_colonialism_social_theory_2021, title = {Colonialism and \textbf{Modern Social Theory}}, author = {Bhambra, Gurminder K. and Holmwood, John}, @@ -282,7 +331,7 @@ mod tests { "; let bibliography = Bibliography::parse(src).unwrap(); let mut formatting_struct = CitekeyFormatting { - bibfile_path: PathBuf::new(), + bibfile_path: (PathBuf::new(), None), bib_entries: bibliography, fields: vec![ "entrytype;;;;:".into(), @@ -294,29 +343,35 @@ mod tests { case: None, old_new_keys_map: Vec::new(), }; - formatting_struct.do_formatting(); - let keys = formatting_struct.bib_entries.keys().collect_vec(); + let _ = formatting_struct.do_formatting(); + assert_eq!( + formatting_struct.old_new_keys_map.get(0).unwrap().1, + "article:Bos-McCurley_LaT_met_and_pub_Empt_2023" + ); assert_eq!( - keys[0], + formatting_struct.old_new_keys_map.get(1).unwrap().1, "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" ); formatting_struct.case = Some(CitekeyCase::Lower); - formatting_struct.do_formatting(); - let keys = formatting_struct.bib_entries.keys().collect_vec(); + let _ = formatting_struct.do_formatting().rev_sort_new_keys_by_len(); + // now the longer citekey is processed first and its in lowercase! assert_eq!( - keys[0], + formatting_struct.old_new_keys_map.get(0).unwrap().1, "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" ); - // let bib_string = formatting_struct.bib_entries.to_biblatex_string(); - // let new_entry = r" - // @book{book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021, - // title = {Colonialism and \textbf{Modern Social Theory}}, - // author = {Bhambra, Gurminder K. and Holmwood, John}, - // location = {Cambridge and Medford}, - // publisher = {Polity Press}, - // date = {2021}, - // } - // "; - // assert_eq!(new_entry, bib_string); + } + + #[test] + fn sorting_appended_citekeys() { + let mut keys: Vec<(String, String)> = vec![ + ("smith2000".into(), "smith_book_2000".into()), + ("smith2000a".into(), "smith_book_2000a".into()), + ("smith2000ab".into(), "smith_book_2000ab".into()), + ]; + keys.sort_by(|a, b| b.1.len().cmp(&a.1.len())); + let mut keys = keys.iter(); + assert_eq!(keys.next().unwrap().1, "smith_book_2000ab"); + assert_eq!(keys.next().unwrap().1, "smith_book_2000a"); + assert_eq!(keys.next().unwrap().1, "smith_book_2000"); } } diff --git a/src/cliargs.rs b/src/cliargs.rs index 082ecda..3b12fc3 100644 --- a/src/cliargs.rs +++ b/src/cliargs.rs @@ -18,20 +18,19 @@ use color_eyre::eyre::Result; use dirs::{config_dir, home_dir}; use lexopt::prelude::*; +use owo_colors::OwoColorize; use owo_colors::colors::css::LightGreen; use owo_colors::colors::*; -use owo_colors::OwoColorize; use std::env; use std::path::PathBuf; use walkdir::WalkDir; use crate::app; +use crate::config::BibiConfig; // struct for CLIArgs #[derive(Debug, Default, Clone)] pub struct CLIArgs { - pub helparg: bool, - pub versionarg: bool, pub pos_args: Vec, pub cfg_path: Option, pub light_theme: bool, @@ -39,7 +38,7 @@ pub struct CLIArgs { } impl CLIArgs { - pub fn parse_args() -> Result { + pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig)> { let mut args = CLIArgs::default(); let mut parser = lexopt::Parser::from_env(); @@ -52,22 +51,57 @@ impl CLIArgs { None }; + // if parser + // .raw_args() + // .is_ok_and(|mut arg| arg.next_if(|a| a == "format-citekeys").is_some()) + // { + // todo!("Format citekeys options"); + // } + while let Some(arg) = parser.next()? { match arg { - Short('h') | Long("help") => args.helparg = true, - Short('v') | Long("version") => args.versionarg = true, + Short('h') | Long("help") => { + println!("{}", help_func()); + std::process::exit(0); + } + Short('v') | Long("version") => { + println!("{}", version_func()); + std::process::exit(0); + } Short('c') | Long("config-file") => args.cfg_path = Some(parser.value()?.parse()?), Long("light-terminal") => args.light_theme = true, Long("pdf-path") => { args.pdf_path = Some(parser.value()?.parse()?); } // Value(pos_arg) => parse_files(&mut args, pos_arg), - Value(pos_arg) => args.pos_args.push(pos_arg.into()), - _ => return Err(arg.unexpected()), + Value(pos_arg) => { + if args.pos_args.is_empty() && pos_arg == "format-citekeys" { + todo!("Write format citekeys function"); + } else { + args.pos_args.push(parser.value()?.into()); + } + } + _ => return Err(arg.unexpected().into()), } } - Ok(args) + if args + .cfg_path + .as_ref() + .is_some_and(|f| !f.try_exists().unwrap() || !f.is_file()) + { + BibiConfig::create_default_config(&args); + } + + let mut cfg = if args.cfg_path.is_some() { + BibiConfig::parse_config(&args)? + } else { + BibiConfig::new(&args) + }; + + cfg.cli_overwrite(&args); + + Ok((args, cfg)) } } diff --git a/src/main.rs b/src/main.rs index c956d7c..58805d5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,35 +31,7 @@ pub mod tui; #[tokio::main] async fn main() -> Result<()> { // Parse CLI arguments - let mut parsed_args = CLIArgs::parse_args()?; - - // Print help if -h/--help flag is passed and exit - if parsed_args.helparg { - println!("{}", cliargs::help_func()); - std::process::exit(0); - } - - // Print version if -v/--version flag is passed and exit - if parsed_args.versionarg { - println!("{}", cliargs::version_func()); - std::process::exit(0); - } - - if parsed_args - .cfg_path - .as_ref() - .is_some_and(|f| !f.try_exists().unwrap() || !f.is_file()) - { - BibiConfig::create_default_config(&parsed_args); - } - - let mut cfg = if parsed_args.cfg_path.is_some() { - BibiConfig::parse_config(&parsed_args)? - } else { - BibiConfig::new(&parsed_args) - }; - - cfg.cli_overwrite(&parsed_args); + let (mut parsed_args, mut cfg) = CLIArgs::parse_args()?; init_error_hooks()?; -- cgit v1.2.3 From 669936a8e4ff99012e8b32ae15616f8fe206ab2d Mon Sep 17 00:00:00 2001 From: lukeflo Date: Thu, 9 Oct 2025 16:48:54 +0200 Subject: subcommand test for pure cli operations --- src/cliargs.rs | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/cliargs.rs b/src/cliargs.rs index 3b12fc3..26a07af 100644 --- a/src/cliargs.rs +++ b/src/cliargs.rs @@ -15,7 +15,6 @@ // along with this program. If not, see . ///// -use color_eyre::eyre::Result; use dirs::{config_dir, home_dir}; use lexopt::prelude::*; use owo_colors::OwoColorize; @@ -41,6 +40,7 @@ impl CLIArgs { pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig)> { let mut args = CLIArgs::default(); let mut parser = lexopt::Parser::from_env(); + let mut subcommand = None; // Default config args.cfg_path = if config_dir().is_some() { @@ -73,12 +73,22 @@ impl CLIArgs { Long("pdf-path") => { args.pdf_path = Some(parser.value()?.parse()?); } - // Value(pos_arg) => parse_files(&mut args, pos_arg), Value(pos_arg) => { - if args.pos_args.is_empty() && pos_arg == "format-citekeys" { - todo!("Write format citekeys function"); + if args.pos_args.is_empty() { + let value = pos_arg + .into_string() + .unwrap_or_else(|os| os.to_string_lossy().to_string()); + match value.as_str() { + "format-citekeys" => { + subcommand = Some(value); + break; + } + _ => { + args.pos_args.push(value.into()); + } + } } else { - args.pos_args.push(parser.value()?.into()); + args.pos_args.push(pos_arg.into()); } } _ => return Err(arg.unexpected().into()), @@ -88,7 +98,7 @@ impl CLIArgs { if args .cfg_path .as_ref() - .is_some_and(|f| !f.try_exists().unwrap() || !f.is_file()) + .is_some_and(|f| f.try_exists().is_err() || !f.is_file()) { BibiConfig::create_default_config(&args); } @@ -99,6 +109,13 @@ impl CLIArgs { BibiConfig::new(&args) }; + if let Some(cmd) = subcommand { + match cmd.as_str() { + "format-citekeys" => todo!("write citekey formatting"), + _ => {} + } + } + cfg.cli_overwrite(&args); Ok((args, cfg)) -- cgit v1.2.3 From f9548af5c7693edf536b4ad45564a964338e2c2e Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 10:31:45 +0200 Subject: set up citekey formatting cli, reformat help output --- Cargo.lock | 1 + Cargo.toml | 1 + src/bibiman/citekeys.rs | 87 ++++++++++++++++++++++++++++++++++++++- src/cliargs.rs | 106 ++++++++++++++++++++++++++++++++---------------- src/main.rs | 14 ++++--- 5 files changed, 168 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 49f65b4..a27636e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -107,6 +107,7 @@ dependencies = [ "editor-command", "figment", "futures", + "indoc", "itertools", "lexopt", "logos", diff --git a/Cargo.toml b/Cargo.toml index 098848e..abf1eee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ figment = { version = "0.10.19", features = [ "toml", "test" ]} owo-colors = "4.2.2" logos = "0.15.1" phf = { version = "0.13.1", features = ["macros"] } +indoc = "2.0.6" [workspace.metadata.cross.target.aarch64-unknown-linux-gnu] # Install libssl-dev:arm64, see diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index b7995ac..cafd124 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -23,7 +23,12 @@ use std::{ use biblatex::{Bibliography, ChunksExt, Entry, Type}; use color_eyre::eyre::eyre; -use owo_colors::OwoColorize; +use indoc::formatdoc; +use lexopt::Arg::{Long, Short}; +use owo_colors::{ + OwoColorize, + colors::{BrightBlue, Green, White}, +}; use serde::{Deserialize, Serialize}; use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; @@ -48,6 +53,30 @@ pub(crate) struct CitekeyFormatting { } impl CitekeyFormatting { + pub(crate) fn parse_citekey_cli( + parser: &mut lexopt::Parser, + cfg: &BibiConfig, + ) -> color_eyre::Result<()> { + let mut formatter = CitekeyFormatting::default(); + + while let Some(arg) = parser.next()? { + match arg { + Short('h') | Long("help") => { + formatting_help(); + return Ok(()); + } + Short('s') | Short('f') | Long("source") | Long("file") => { + formatter.bibfile_path.0 = parser.value()?.into() + } + Short('t') | Short('o') | Long("target") | Long("output") => { + formatter.bibfile_path.1 = Some(parser.value()?.into()) + } + _ => return Err(arg.unexpected().into()), + } + } + + Ok(()) + } /// Start Citekey formatting with building a new instance of `CitekeyFormatting` /// Formatting is processed file by file, because `bibman` can handle /// multi-file setups. @@ -136,6 +165,62 @@ impl CitekeyFormatting { } } +fn formatting_help() { + let help = vec![ + formatdoc!( + "{} {}\n", + env!("CARGO_PKG_NAME").fg::().bold(), + env!("CARGO_PKG_VERSION") + ), + formatdoc!("{}", "USAGE".bold()), + formatdoc!( + "\t{} {} {} {}\n", + env!("CARGO_PKG_NAME").fg::().bold(), + "format-citekeys".bold(), + "--source=".bold(), + "--output=".bold() + ), + formatdoc!( + " + \tThis help describes the CLI usage for the citekey formatting + \tfunctionality of bibiman. The definition of patterns how the + \tcitekeys should be formatted must be set in the config file. + \tFor further informations how to use this patterns etc. see: + \t{} + ", + "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman" + .italic() + .fg::() + ), + formatdoc!("{}", "OPTIONS".bold()), + formatdoc!( + " + \t{} + \tShow this help and exit + ", + "-h, --help".fg::().bold() + ), + formatdoc! {" + \t{} + \tThe bibfile for which the citekey formatting should be processed. + \tTakes a path as argument. + ", "-s, -f, --source=, --file=".fg::().bold()}, + formatdoc!( + " + \t{} + \tThe bibfile to which the updated content should be written. + \tTakes a path as argument. If the file doesn't exist, it will be + \tcreated. + \tIf the argument isn't used, the original file will be {}! + ", + "-t, -o, --target=, --output=".fg::().bold(), + "overwritten".italic(), + ), + ]; + let help = help.join("\n"); + println!("{}", help); +} + /// Build the citekey from the patterns defined in the config file fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { let mut new_citekey = String::new(); diff --git a/src/cliargs.rs b/src/cliargs.rs index 26a07af..e766e77 100644 --- a/src/cliargs.rs +++ b/src/cliargs.rs @@ -16,6 +16,7 @@ ///// use dirs::{config_dir, home_dir}; +use indoc::formatdoc; use lexopt::prelude::*; use owo_colors::OwoColorize; use owo_colors::colors::css::LightGreen; @@ -25,6 +26,7 @@ use std::path::PathBuf; use walkdir::WalkDir; use crate::app; +use crate::bibiman::citekeys::CitekeyFormatting; use crate::config::BibiConfig; // struct for CLIArgs @@ -37,10 +39,16 @@ pub struct CLIArgs { } impl CLIArgs { - pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig)> { + /// This struct parses the command line and initializes and returns the + /// necessary structs `CLIArgs` and `BibiConfig`. + /// + /// Additionally, it returns a bool which defines if the TUI should be run + /// or not. The latter is the case for pure CLI processes as `format-citekeys`. + pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig, bool)> { let mut args = CLIArgs::default(); let mut parser = lexopt::Parser::from_env(); let mut subcommand = None; + let mut run_tui = true; // Default config args.cfg_path = if config_dir().is_some() { @@ -81,6 +89,7 @@ impl CLIArgs { match value.as_str() { "format-citekeys" => { subcommand = Some(value); + run_tui = false; break; } _ => { @@ -111,14 +120,16 @@ impl CLIArgs { if let Some(cmd) = subcommand { match cmd.as_str() { - "format-citekeys" => todo!("write citekey formatting"), + "format-citekeys" => { + CitekeyFormatting::parse_citekey_cli(&mut parser, &cfg)?; + } _ => {} } } cfg.cli_overwrite(&args); - Ok((args, cfg)) + Ok((args, cfg, run_tui)) } } @@ -172,14 +183,21 @@ pub fn help_func() -> String { env!("CARGO_PKG_VERSION").fg::(), ), format!( - "{}:\n\t{} [Flags] [files/dirs]\n", + "{}\n\t{} [OPTIONS] [SUBCOMMAND | POSITIONAL ARGUMENTS]\n", "USAGE".bold(), - "bibiman".bold() + env!("CARGO_PKG_NAME").fg::().bold() + ), + formatdoc!( + " + \tYou can either use a {} or {}, not both! + ", + "subcommand".bold(), + "positional arguments".bold() ), format!( - "{}:\n\t{}\t\tPath to {} file", + "{}\n\t{}\t\tPath to {} file", "POSITIONAL ARGUMENTS".bold(), - "".fg::().bold(), + "".fg::().bold(), ".bib".fg::().bold() ), format!( @@ -188,38 +206,58 @@ pub fn help_func() -> String { ".bib".fg::().bold() ), format!("\n\t{}", "Both can be passed multiple times".italic()), - format!("\n{}:", "FLAGS".bold()), - format!("\t{}", "-h, --help".bold().fg::()), - format!("\t\t{}", "Show this help and exit"), - format!("\t{}", "-v, --version".bold().fg::()), - format!("\t\t{}", "Show the version and exit"), - format!("\t{}", "--light-terminal".bold().fg::()), - format!( - "\t\t{}", - "Enable default colors for light terminal background" + format!("\n{}", "SUBCOMMANDS".bold()), + formatdoc!( + " + \t{} + \tRun the citekey formatting procedure on a specified bibfile. + \tFor further infos run {} + ", + "format-citekeys".fg::().bold(), + "bibiman format-citekeys --help".fg::().bold() ), - format!( - "\t{}{}", - "-c, --config-file=".bold().fg::(), - "".bold().italic().fg::() + format!("{}", "OPTIONS".bold()), + formatdoc!( + " + \t{} + \tShow this help and exit + ", + "-h, --help".bold().fg::() ), - format!("\t\t{}", "Path to config file used for current session."), - format!("\t\t{}", "Takes precedence over standard config file."), - format!( - "\t{}{}", - "--pdf-path=".bold().fg::(), - "".bold().italic().fg::() + formatdoc!( + " + \t{} + \tShow the version and exit + ", + "-v, --version".bold().fg::() ), - format!("\t\t{}", "Path to directory containing PDF files."), - format!( - "\t\t{}", - "If the pdf files basename matches an entrys citekey," + formatdoc!( + " + \t{} + \tEnable default colors for light terminal background + ", + "--light-terminal".bold().fg::() ), - format!( - "\t\t{}", - "its attached as connected PDF file for the current session." + formatdoc!( + " + \t{}{} + \tPath to config file used for current session. + \tTakes precedence over standard config file. + ", + "-c, --config-file=".bold().fg::(), + "".bold().italic().fg::() + ), + formatdoc!( + " + \t{}{} + \tPath to directory containing PDF files. + \tIf the pdf files basename matches an entrys citekey, + \tits attached as connected PDF file for the current session. + \tDoes not edit the bibfile itself! + ", + "--pdf-path=".bold().fg::(), + "".bold().italic().fg::() ), - format!("\t\t{}", "Does not edit the bibfile itself!"), ]; let help = help.join("\n"); help diff --git a/src/main.rs b/src/main.rs index 58805d5..e735eb0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,7 +18,6 @@ use app::App; use cliargs::CLIArgs; use color_eyre::eyre::Result; -use config::BibiConfig; use errorsetup::init_error_hooks; pub mod app; @@ -31,13 +30,16 @@ pub mod tui; #[tokio::main] async fn main() -> Result<()> { // Parse CLI arguments - let (mut parsed_args, mut cfg) = CLIArgs::parse_args()?; + let (mut parsed_args, mut cfg, run_tui) = CLIArgs::parse_args()?; - init_error_hooks()?; + if run_tui { + init_error_hooks()?; - // Create an application. - let mut app = App::new(&mut parsed_args, &mut cfg)?; + // Create an application. + let mut app = App::new(&mut parsed_args, &mut cfg)?; + + app.run(&cfg).await?; + } - app.run(&cfg).await?; Ok(()) } -- cgit v1.2.3 From 4779dbc5fe3712bce31bbb5f1f43c28c4c839420 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 13:47:07 +0200 Subject: substitute byte index for char counting loop, impl `dry-run` option for citekey formatting --- src/bibiman/citekeys.rs | 118 +++++++++++++++++++++++++++++++++++------------- tests/test-config.toml | 4 ++ 2 files changed, 90 insertions(+), 32 deletions(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index cafd124..5121741 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -22,7 +22,7 @@ use std::{ }; use biblatex::{Bibliography, ChunksExt, Entry, Type}; -use color_eyre::eyre::eyre; +use color_eyre::eyre::{OptionExt, eyre}; use indoc::formatdoc; use lexopt::Arg::{Long, Short}; use owo_colors::{ @@ -50,6 +50,7 @@ pub(crate) struct CitekeyFormatting { fields: Vec, case: Option, old_new_keys_map: Vec<(String, String)>, + dry_run: bool, } impl CitekeyFormatting { @@ -59,12 +60,27 @@ impl CitekeyFormatting { ) -> color_eyre::Result<()> { let mut formatter = CitekeyFormatting::default(); + formatter.fields = cfg + .citekey_formatter + .fields + .clone() + .ok_or_eyre("Need to define fields correctly in config file")?; + + formatter.case = cfg.citekey_formatter.case.clone(); + + if formatter.fields.is_empty() { + return Err(eyre!( + "To format all citekeys, you need to provide {} values in the config file", + "fields".bold() + )); + } while let Some(arg) = parser.next()? { match arg { Short('h') | Long("help") => { formatting_help(); return Ok(()); } + Short('d') | Long("dry-run") => formatter.dry_run = true, Short('s') | Short('f') | Long("source") | Long("file") => { formatter.bibfile_path.0 = parser.value()?.into() } @@ -75,6 +91,16 @@ impl CitekeyFormatting { } } + let bibstring = std::fs::read_to_string(&formatter.bibfile_path.0)?; + + formatter.bib_entries = Bibliography::parse(&bibstring) + .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?; + + formatter + .do_formatting() + .rev_sort_new_keys_by_len() + .update_file()?; + Ok(()) } /// Start Citekey formatting with building a new instance of `CitekeyFormatting` @@ -108,6 +134,7 @@ impl CitekeyFormatting { fields, case: cfg.citekey_formatter.case.clone(), old_new_keys_map: Vec::new(), + dry_run: false, }) } @@ -122,32 +149,40 @@ impl CitekeyFormatting { } self.old_new_keys_map = old_new_keys; + // dbg!(&self.old_new_keys_map); self } /// Write entries with updated citekeys to bibfile - pub fn update_file(&self) -> color_eyre::Result<()> { - let source_file = self.bibfile_path.0.as_path(); - let target_file = if let Some(path) = &self.bibfile_path.1 { - path + pub fn update_file(&mut self) -> color_eyre::Result<()> { + if self.dry_run { + println!("Following citekeys would be formatted: old => new\n"); + self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0)); + for (old, new) in &self.old_new_keys_map { + println!("{} => {}", old.italic(), new.bold()) + } } else { - source_file - }; - let mut content = std::fs::read_to_string(source_file)?; - - for (old_key, new_key) in self.old_new_keys_map.iter() { - content = content.replace(old_key, new_key); - } + let source_file = self.bibfile_path.0.as_path(); + let target_file = if let Some(path) = &self.bibfile_path.1 { + path + } else { + source_file + }; + let mut content = std::fs::read_to_string(source_file)?; - let mut new_file = OpenOptions::new() - .truncate(true) - .write(true) - .create(true) - .open(target_file)?; + for (old_key, new_key) in self.old_new_keys_map.iter() { + content = content.replace(old_key, new_key); + } - new_file.write_all(content.as_bytes())?; + let mut new_file = OpenOptions::new() + .truncate(true) + .write(true) + .create(true) + .open(target_file)?; + new_file.write_all(content.as_bytes())?; + } Ok(()) } @@ -160,7 +195,7 @@ impl CitekeyFormatting { /// anymore. pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self { self.old_new_keys_map - .sort_by(|a, b| b.1.len().cmp(&a.1.len())); + .sort_by(|a, b| b.0.len().cmp(&a.0.len())); self } } @@ -232,14 +267,20 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey let field = preformat_field(field, entry); let mut split_field = field.split_whitespace(); let mut words_passed = 0; - let word_count = if let Some(val) = word_count { + let field_count = field.split_whitespace().count(); + let word_count = if let Some(val) = word_count + && val <= field_count + { val } else { - field.split_whitespace().count() + field_count }; loop { + if field.is_empty() { + break; + } if let Some(field_slice) = split_field.next() { - formatted_str = formatted_str + format_word(field_slice, char_count); + formatted_str = formatted_str + &format_word(field_slice, char_count); words_passed += 1; if word_count == words_passed { formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); @@ -270,7 +311,7 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey fn preformat_field(field: &str, entry: &Entry) -> String { match field { "title" => { - sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) } "author" => { if let Ok(authors) = entry.author() { @@ -278,21 +319,20 @@ fn preformat_field(field: &str, entry: &Entry) -> String { for a in authors.iter() { last_names = last_names + &a.name + " "; } - dbg!(&last_names); last_names } else { - "NA".to_string() + "".to_string() } } "year" => { if let Ok(date) = entry.date() { date.to_chunks().format_verbatim()[..4].to_string() } else { - entry.get_as::(field).unwrap_or("NA".into()) + entry.get_as::(field).unwrap_or("".into()) } } "subtitle" => { - sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) } "editor" => { if let Ok(editors) = entry.editors() { @@ -304,22 +344,35 @@ fn preformat_field(field: &str, entry: &Entry) -> String { } last_names } else { - "NA".to_string() + "".to_string() } } "pubtype" | "entrytype" => entry.entry_type.to_string(), - _ => entry.get_as::(field).unwrap_or("Empty".into()), + _ => entry.get_as::(field).unwrap_or("".into()), } } /// Cut of word at char count index if its set -fn format_word(word: &str, count: Option) -> &str { +fn format_word(word: &str, count: Option) -> String { if let Some(len) = count && len < word.chars().count() { - &word[..len] + // Since chars can consist of multiple bytes, we need this more complex + // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) + // instead of simple byte indexing + let mut word_slice = String::new(); + let word_chars = word.chars(); + let mut counter = 0; + for c in word_chars { + if counter == len { + break; + } + word_slice.push(c); + counter += 1; + } + word_slice } else { - word + word.to_string() } } @@ -427,6 +480,7 @@ mod tests { ], case: None, old_new_keys_map: Vec::new(), + dry_run: false, }; let _ = formatting_struct.do_formatting(); assert_eq!( diff --git a/tests/test-config.toml b/tests/test-config.toml index fc447f1..b484b69 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -59,3 +59,7 @@ custom_column = "series" # author_color = "38" # title_color = "37" # year_color = "135" + +[citekey_formatter] +fields = [ "author;2;;-;_", "title;3;3;_;_", "year" ] +case = "lowercase" -- cgit v1.2.3 From c69b1789fabaf149916d160922d7026f2cbe33f1 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 14:57:53 +0200 Subject: implement const of ignored special chars for citekey formatting * the list contains 33 special chars at the moment * it will only affect already existing special chars in biblatex fields * delimiter specified for citekey formatting are not affected * char count is also not affected, ignored chars are not counted --- src/bibiman/citekeys.rs | 40 +++++++++++++++++++++------------------- src/config.rs | 5 +++++ tests/test-config.toml | 2 +- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 5121741..7c06886 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -31,7 +31,10 @@ use owo_colors::{ }; use serde::{Deserialize, Serialize}; -use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; +use crate::{ + bibiman::sanitize::sanitize_single_string_fully, + config::{BibiConfig, IGNORED_SPECIAL_CHARS}, +}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum CitekeyCase { @@ -354,26 +357,25 @@ fn preformat_field(field: &str, entry: &Entry) -> String { /// Cut of word at char count index if its set fn format_word(word: &str, count: Option) -> String { - if let Some(len) = count - && len < word.chars().count() - { - // Since chars can consist of multiple bytes, we need this more complex - // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) - // instead of simple byte indexing - let mut word_slice = String::new(); - let word_chars = word.chars(); - let mut counter = 0; - for c in word_chars { - if counter == len { - break; - } - word_slice.push(c); - counter += 1; + // Since chars can consist of multiple bytes, we need this more complex + // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) + // instead of simple byte indexing + let mut word_slice = String::new(); + let word_chars = word.chars(); + let mut counter = 0; + for c in word_chars { + if let Some(len) = count + && counter == len + { + break; } - word_slice - } else { - word.to_string() + if IGNORED_SPECIAL_CHARS.contains(&c) { + continue; + } + word_slice.push(c); + counter += 1; } + word_slice } /// Split a formatting pattern of kind diff --git a/src/config.rs b/src/config.rs index a5df61c..a4e89be 100644 --- a/src/config.rs +++ b/src/config.rs @@ -35,6 +35,11 @@ use crate::{ cliargs::CLIArgs, }; +pub const IGNORED_SPECIAL_CHARS: [char; 33] = [ + '?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%', + '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', +]; + const DEFAULT_CONFIG: &str = r##" # [general] ## Default files/dirs which are loaded on startup diff --git a/tests/test-config.toml b/tests/test-config.toml index b484b69..558d216 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,5 +61,5 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = [ "author;2;;-;_", "title;3;3;_;_", "year" ] +fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] case = "lowercase" -- cgit v1.2.3 From 418d2f3874c8e86c4b58143115ee3d4181130f9c Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 15:09:48 +0200 Subject: add dry-run information to --help function --- src/bibiman/citekeys.rs | 11 +++++++++++ src/config.rs | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 7c06886..f7704fb 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -238,6 +238,17 @@ fn formatting_help() { ", "-h, --help".fg::().bold() ), + formatdoc!( + " + \t{} + \tDon't apply any changes to the named files. Instead print all + \told citekeys and the formatted strings that would have been + \tapplied in the format: {} => {} + ", + "-d, --dry-run".fg::().bold(), + "old_key".italic(), + "new_key".bold() + ), formatdoc! {" \t{} \tThe bibfile for which the citekey formatting should be processed. diff --git a/src/config.rs b/src/config.rs index a4e89be..b1c4b07 100644 --- a/src/config.rs +++ b/src/config.rs @@ -103,6 +103,21 @@ const DEFAULT_CONFIG: &str = r##" # author_color = "38" # title_color = "37" # year_color = "135" + +# [citekey_formatter] +## Define the patterns for creating citekeys. Every item of the array consists of +## five components separated by semicolons. Despite the field name every component +## can be left blank: +## - name of the biblatex field ("author", "title"...) +## - number of max words from the given field +## - number of chars used from each word +## - delimiter to separate words of the same field +## - trailing delimiter separating the current field from the following +# fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] + +## Convert chars to specified case. Possible values: +## "upper", "uppercase", "lower", "lowercase" +# case = "lowercase" "##; /// Main struct of the config file. Contains substructs/headings in toml -- cgit v1.2.3 From 211d556d6dc7132442c90b3ae6ac7485cf30ac5e Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 18:22:26 +0200 Subject: trimming citekey if last field is empty: WIP --- src/bibiman/citekeys.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index f7704fb..1f16b48 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -273,7 +273,8 @@ fn formatting_help() { /// Build the citekey from the patterns defined in the config file fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { let mut new_citekey = String::new(); - for pattern in pattern_fields.iter() { + let fields = pattern_fields.len(); + for (idx, pattern) in pattern_fields.iter().enumerate() { let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = split_formatting_pat(pattern); let formatted_field_str = { -- cgit v1.2.3 From 49d9a57bd15565116a51380d3552201b4a2de57b Mon Sep 17 00:00:00 2001 From: lukeflo Date: Sun, 12 Oct 2025 14:05:47 +0200 Subject: pop trailing delimiter if last field is empty --- src/bibiman/citekeys.rs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 1f16b48..065d57f 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -272,14 +272,27 @@ fn formatting_help() { /// Build the citekey from the patterns defined in the config file fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { + // mut string the citekey is built from let mut new_citekey = String::new(); + + // count different fields of pattern vec let fields = pattern_fields.len(); + + // loop over pattern fields process them for (idx, pattern) in pattern_fields.iter().enumerate() { - let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = + // parse single values from pattern field + let (field_name, word_count, char_count, inner_delimiter, trailing_delimiter) = split_formatting_pat(pattern); + + // built the part of the citekey from the current pattern field let formatted_field_str = { let mut formatted_str = String::new(); - let field = preformat_field(field, entry); + + // preformat the field depending on biblatex value + let field = preformat_field(field_name, entry); + + // split at whitespaces, count fields and set counter for processed + // splits let mut split_field = field.split_whitespace(); let mut words_passed = 0; let field_count = field.split_whitespace().count(); @@ -290,10 +303,19 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey } else { field_count }; + + // loop over single parts of current field and add correct delimiter loop { + // terminate loop for current field if its empty. If its also the + // last of the pattern vec, pop the trailing delimiter if field.is_empty() { + if idx + 1 == fields { + let _ = new_citekey.pop(); + } break; } + + // process the single slices and add correct delimiter if let Some(field_slice) = split_field.next() { formatted_str = formatted_str + &format_word(field_slice, char_count); words_passed += 1; -- cgit v1.2.3 From f112c4e13009e5ddfe3cf5c4cbe7f29f832b8553 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Sun, 12 Oct 2025 21:51:21 +0200 Subject: solve double delimiters with empty fields --- src/bibiman/citekeys.rs | 36 ++++++++++++++++++++++++++++++------ tests/test-config.toml | 2 +- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 065d57f..9d17403 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -317,13 +317,37 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey // process the single slices and add correct delimiter if let Some(field_slice) = split_field.next() { - formatted_str = formatted_str + &format_word(field_slice, char_count); - words_passed += 1; - if word_count == words_passed { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; + // Create word slice char by char. We need to loop over chars + // instead of a simple bytes index to also catch chars which + // consist of more than one byte (äöüøæ etc...) + let mut word_slice = String::new(); + let word_chars = field_slice.chars(); + let mut counter = 0; + for c in word_chars { + if let Some(len) = char_count + && counter == len + { + break; + } + // if a word slice contains a special char, skip it + if IGNORED_SPECIAL_CHARS.contains(&c) { + continue; + } + word_slice.push(c); + counter += 1; + } + // Don't count empty slices and don't add delimiter to those + if !word_slice.is_empty() { + formatted_str = formatted_str + &word_slice; + words_passed += 1; + if word_count == words_passed { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; + } else { + formatted_str = formatted_str + inner_delimiter.unwrap_or(""); + } } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or("") + continue; } } else { formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); diff --git a/tests/test-config.toml b/tests/test-config.toml index 558d216..2c5ac96 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,5 +61,5 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] +fields = [ "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ] case = "lowercase" -- cgit v1.2.3 From 0a8805acfb6fbb3d3a8c22f4ccbaf692a73cddfb Mon Sep 17 00:00:00 2001 From: lukeflo Date: Sun, 12 Oct 2025 23:01:17 +0200 Subject: ignore list for words, but need to solve inner delimiter problem for words ignored --- src/bibiman/citekeys.rs | 317 ++------------------------------ src/bibiman/citekeys/citekey_utils.rs | 327 ++++++++++++++++++++++++++++++++++ tests/test-config.toml | 7 +- 3 files changed, 348 insertions(+), 303 deletions(-) create mode 100644 src/bibiman/citekeys/citekey_utils.rs diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 9d17403..2f56947 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -21,27 +21,33 @@ use std::{ path::{Path, PathBuf}, }; -use biblatex::{Bibliography, ChunksExt, Entry, Type}; +use biblatex::Bibliography; use color_eyre::eyre::{OptionExt, eyre}; -use indoc::formatdoc; use lexopt::Arg::{Long, Short}; -use owo_colors::{ - OwoColorize, - colors::{BrightBlue, Green, White}, -}; +use owo_colors::OwoColorize; use serde::{Deserialize, Serialize}; use crate::{ - bibiman::sanitize::sanitize_single_string_fully, - config::{BibiConfig, IGNORED_SPECIAL_CHARS}, + bibiman::citekeys::citekey_utils::{build_citekey, formatting_help}, + config::BibiConfig, }; +mod citekey_utils; + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum CitekeyCase { #[serde(alias = "uppercase", alias = "upper")] Upper, #[serde(alias = "lowercase", alias = "lower")] Lower, + #[serde( + alias = "camel", + alias = "camelcase", + alias = "camel_case", + alias = "uppercamelcase", + alias = "upper_camel_case" + )] + Camel, } #[derive(Debug, Default, Clone)] @@ -203,306 +209,13 @@ impl CitekeyFormatting { } } -fn formatting_help() { - let help = vec![ - formatdoc!( - "{} {}\n", - env!("CARGO_PKG_NAME").fg::().bold(), - env!("CARGO_PKG_VERSION") - ), - formatdoc!("{}", "USAGE".bold()), - formatdoc!( - "\t{} {} {} {}\n", - env!("CARGO_PKG_NAME").fg::().bold(), - "format-citekeys".bold(), - "--source=".bold(), - "--output=".bold() - ), - formatdoc!( - " - \tThis help describes the CLI usage for the citekey formatting - \tfunctionality of bibiman. The definition of patterns how the - \tcitekeys should be formatted must be set in the config file. - \tFor further informations how to use this patterns etc. see: - \t{} - ", - "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman" - .italic() - .fg::() - ), - formatdoc!("{}", "OPTIONS".bold()), - formatdoc!( - " - \t{} - \tShow this help and exit - ", - "-h, --help".fg::().bold() - ), - formatdoc!( - " - \t{} - \tDon't apply any changes to the named files. Instead print all - \told citekeys and the formatted strings that would have been - \tapplied in the format: {} => {} - ", - "-d, --dry-run".fg::().bold(), - "old_key".italic(), - "new_key".bold() - ), - formatdoc! {" - \t{} - \tThe bibfile for which the citekey formatting should be processed. - \tTakes a path as argument. - ", "-s, -f, --source=, --file=".fg::().bold()}, - formatdoc!( - " - \t{} - \tThe bibfile to which the updated content should be written. - \tTakes a path as argument. If the file doesn't exist, it will be - \tcreated. - \tIf the argument isn't used, the original file will be {}! - ", - "-t, -o, --target=, --output=".fg::().bold(), - "overwritten".italic(), - ), - ]; - let help = help.join("\n"); - println!("{}", help); -} - -/// Build the citekey from the patterns defined in the config file -fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { - // mut string the citekey is built from - let mut new_citekey = String::new(); - - // count different fields of pattern vec - let fields = pattern_fields.len(); - - // loop over pattern fields process them - for (idx, pattern) in pattern_fields.iter().enumerate() { - // parse single values from pattern field - let (field_name, word_count, char_count, inner_delimiter, trailing_delimiter) = - split_formatting_pat(pattern); - - // built the part of the citekey from the current pattern field - let formatted_field_str = { - let mut formatted_str = String::new(); - - // preformat the field depending on biblatex value - let field = preformat_field(field_name, entry); - - // split at whitespaces, count fields and set counter for processed - // splits - let mut split_field = field.split_whitespace(); - let mut words_passed = 0; - let field_count = field.split_whitespace().count(); - let word_count = if let Some(val) = word_count - && val <= field_count - { - val - } else { - field_count - }; - - // loop over single parts of current field and add correct delimiter - loop { - // terminate loop for current field if its empty. If its also the - // last of the pattern vec, pop the trailing delimiter - if field.is_empty() { - if idx + 1 == fields { - let _ = new_citekey.pop(); - } - break; - } - - // process the single slices and add correct delimiter - if let Some(field_slice) = split_field.next() { - // Create word slice char by char. We need to loop over chars - // instead of a simple bytes index to also catch chars which - // consist of more than one byte (äöüøæ etc...) - let mut word_slice = String::new(); - let word_chars = field_slice.chars(); - let mut counter = 0; - for c in word_chars { - if let Some(len) = char_count - && counter == len - { - break; - } - // if a word slice contains a special char, skip it - if IGNORED_SPECIAL_CHARS.contains(&c) { - continue; - } - word_slice.push(c); - counter += 1; - } - // Don't count empty slices and don't add delimiter to those - if !word_slice.is_empty() { - formatted_str = formatted_str + &word_slice; - words_passed += 1; - if word_count == words_passed { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or(""); - } - } else { - continue; - } - } else { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - }; - } - formatted_str - }; - new_citekey = new_citekey + &formatted_field_str; - } - if let Some(case_format) = case { - match case_format { - CitekeyCase::Lower => new_citekey.to_lowercase(), - CitekeyCase::Upper => new_citekey.to_uppercase(), - } - } else { - new_citekey - } -} - -/// Preformat some fields which are very common to be used in citekeys -fn preformat_field(field: &str, entry: &Entry) -> String { - match field { - "title" => { - sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) - } - "author" => { - if let Ok(authors) = entry.author() { - let mut last_names = String::new(); - for a in authors.iter() { - last_names = last_names + &a.name + " "; - } - last_names - } else { - "".to_string() - } - } - "year" => { - if let Ok(date) = entry.date() { - date.to_chunks().format_verbatim()[..4].to_string() - } else { - entry.get_as::(field).unwrap_or("".into()) - } - } - "subtitle" => { - sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) - } - "editor" => { - if let Ok(editors) = entry.editors() { - let mut last_names = String::new(); - for editortypes in editors.iter() { - for e in editortypes.0.iter() { - last_names = last_names + &e.name + " "; - } - } - last_names - } else { - "".to_string() - } - } - "pubtype" | "entrytype" => entry.entry_type.to_string(), - _ => entry.get_as::(field).unwrap_or("".into()), - } -} - -/// Cut of word at char count index if its set -fn format_word(word: &str, count: Option) -> String { - // Since chars can consist of multiple bytes, we need this more complex - // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) - // instead of simple byte indexing - let mut word_slice = String::new(); - let word_chars = word.chars(); - let mut counter = 0; - for c in word_chars { - if let Some(len) = count - && counter == len - { - break; - } - if IGNORED_SPECIAL_CHARS.contains(&c) { - continue; - } - word_slice.push(c); - counter += 1; - } - word_slice -} - -/// Split a formatting pattern of kind -/// `;;;;`, -/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")` -fn split_formatting_pat( - pattern: &str, -) -> ( - &str, - Option, - Option, - Option<&str>, - Option<&str>, -) { - let mut splits = pattern.split(';'); - ( - splits - .next() - .expect("Need field value for formatting citekey"), - if let Some(next) = splits.next() - && next.len() > 0 - { - next.parse::().ok() - } else { - None - }, - if let Some(next) = splits.next() - && next.len() > 0 - { - next.parse::().ok() - } else { - None - }, - splits.next(), - splits.next(), - ) -} - #[cfg(test)] mod tests { use std::path::PathBuf; use biblatex::Bibliography; - use itertools::Itertools; - - use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat}; - - #[test] - fn split_citekey_pattern() { - let pattern = "title;3;5;_;_"; - - assert_eq!( - split_formatting_pat(pattern), - ("title", Some(3), Some(5), Some("_"), Some("_")) - ); - let pattern = "year"; - - assert_eq!( - split_formatting_pat(pattern), - ("year", None, None, None, None) - ); - - let pattern = "author;1;;;_"; - assert_eq!( - split_formatting_pat(pattern), - ("author", Some(1), None, Some(""), Some("_")) - ); - } + use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting}; #[test] fn format_citekey_test() { diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs new file mode 100644 index 0000000..ee2c849 --- /dev/null +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -0,0 +1,327 @@ +// bibiman - a TUI for managing BibLaTeX databases +// Copyright (C) 2025 lukeflo +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +///// + +use biblatex::{ChunksExt, Entry, Type}; +use indoc::formatdoc; +use owo_colors::{ + OwoColorize, + colors::{BrightBlue, Green, White}, +}; + +use crate::{ + bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}, + config::IGNORED_SPECIAL_CHARS, +}; + +const IGNORE_WORDS: [&str; 20] = [ + "the", "a", "an", "of", "for", "in", "at", "to", "and", "der", "die", "das", "ein", "eine", + "eines", "des", "auf", "und", "für", "vor", +]; + +pub(super) fn formatting_help() { + let help = vec![ + formatdoc!( + "{} {}\n", + env!("CARGO_PKG_NAME").fg::().bold(), + env!("CARGO_PKG_VERSION") + ), + formatdoc!("{}", "USAGE".bold()), + formatdoc!( + "\t{} {} {} {}\n", + env!("CARGO_PKG_NAME").fg::().bold(), + "format-citekeys".bold(), + "--source=".bold(), + "--output=".bold() + ), + formatdoc!( + " + \tThis help describes the CLI usage for the citekey formatting + \tfunctionality of bibiman. The definition of patterns how the + \tcitekeys should be formatted must be set in the config file. + \tFor further informations how to use this patterns etc. see: + \t{} + ", + "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman" + .italic() + .fg::() + ), + formatdoc!("{}", "OPTIONS".bold()), + formatdoc!( + " + \t{} + \tShow this help and exit + ", + "-h, --help".fg::().bold() + ), + formatdoc!( + " + \t{} + \tDon't apply any changes to the named files. Instead print all + \told citekeys and the formatted strings that would have been + \tapplied in the format: {} => {} + ", + "-d, --dry-run".fg::().bold(), + "old_key".italic(), + "new_key".bold() + ), + formatdoc! {" + \t{} + \tThe bibfile for which the citekey formatting should be processed. + \tTakes a path as argument. + ", "-s, -f, --source=, --file=".fg::().bold()}, + formatdoc!( + " + \t{} + \tThe bibfile to which the updated content should be written. + \tTakes a path as argument. If the file doesn't exist, it will be + \tcreated. + \tIf the argument isn't used, the original file will be {}! + ", + "-t, -o, --target=, --output=".fg::().bold(), + "overwritten".italic(), + ), + ]; + let help = help.join("\n"); + println!("{}", help); +} + +/// Build the citekey from the patterns defined in the config file +pub(super) fn build_citekey( + entry: &Entry, + pattern_fields: &[String], + case: Option<&CitekeyCase>, +) -> String { + // mut string the citekey is built from + let mut new_citekey = String::new(); + + // trailing delimiter of previous field + let mut trailing_delimiter: Option<&str> = None; + + // loop over pattern fields process them + 'field_loop: for pattern in pattern_fields.iter() { + // parse single values from pattern field + let (field_name, word_count, char_count, inner_delimiter, cur_trailing_delimiter) = + split_formatting_pat(pattern); + + // built the part of the citekey from the current pattern field + let formatted_field_str = { + let mut formatted_str = String::new(); + + // preformat the field depending on biblatex value + let field = preformat_field(field_name, entry); + + // split at whitespaces, count fields and set counter for processed + // splits + let mut split_field = field.split_whitespace(); + let mut words_passed = 0; + let field_count = field.split_whitespace().count(); + let word_count = if let Some(val) = word_count + && val <= field_count + { + val + } else { + field_count + }; + + // If there is a trailing delimiter from the previous field, push it + if let Some(del) = trailing_delimiter { + formatted_str = del.to_string(); + }; + + // If the current field isn't empty, set trailing delimiter for + // upcoming loop repitition. If it's empty, start next run of loop + // directly + if !field.is_empty() { + trailing_delimiter = cur_trailing_delimiter; + } else { + continue 'field_loop; + } + + // loop over single parts of current field and add correct delimiter + 'word_loop: loop { + // process the single slices and add correct delimiter + if let Some(field_slice) = split_field.next() { + // Create word slice char by char. We need to loop over chars + // instead of a simple bytes index to also catch chars which + // consist of more than one byte (äöüøæ etc...) + let mut word_slice = String::new(); + let word_chars = field_slice.chars(); + let mut counter = 0; + 'char_loop: for mut c in word_chars { + // If camelcase is set, force first char of word to uppercase + if counter == 0 && case == Some(&CitekeyCase::Camel) { + c = c.to_ascii_uppercase() + } + if let Some(len) = char_count + && counter == len + { + break 'char_loop; + } + // if a word slice contains a special char, skip it + if IGNORED_SPECIAL_CHARS.contains(&c) { + continue 'char_loop; + } + word_slice.push(c); + counter += 1; + } + // Don't count empty slices and don't add delimiter to those + if !word_slice.is_empty() { + formatted_str = formatted_str + &word_slice; + words_passed += 1; + if word_count == words_passed { + break 'word_loop; + } else { + formatted_str = formatted_str + inner_delimiter.unwrap_or(""); + } + } else { + continue 'word_loop; + } + } else { + break 'word_loop; + }; + } + formatted_str + }; + new_citekey = new_citekey + &formatted_field_str; + } + match case { + Some(CitekeyCase::Lower) => new_citekey.to_lowercase(), + Some(CitekeyCase::Upper) => new_citekey.to_uppercase(), + _ => new_citekey, + } +} + +/// Preformat some fields which are very common to be used in citekeys +pub(super) fn preformat_field(field: &str, entry: &Entry) -> String { + match field { + // Sanitize all macro code from string + "title" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) + } + // Get author names. Fall back to editors before setting empty string + "author" => { + if let Ok(authors) = entry.author() { + let mut last_names = String::new(); + for a in authors.iter() { + last_names = last_names + &a.name + " "; + } + last_names + } else if let Ok(editors) = entry.editors() { + let mut last_names = String::new(); + for editortypes in editors.iter() { + for e in editortypes.0.iter() { + last_names = last_names + &e.name + " "; + } + } + last_names + } else { + "".to_string() + } + } + // Get year of date field, fallback to year field + "year" => { + if let Ok(date) = entry.date() { + date.to_chunks().format_verbatim()[..4].to_string() + } else { + entry.get_as::(field).unwrap_or("".into()) + } + } + // Sanitize all macro code from string + "subtitle" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) + } + "editor" => { + if let Ok(editors) = entry.editors() { + let mut last_names = String::new(); + for editortypes in editors.iter() { + for e in editortypes.0.iter() { + last_names = last_names + &e.name + " "; + } + } + last_names + } else { + "".to_string() + } + } + "pubtype" | "entrytype" => entry.entry_type.to_string(), + _ => entry.get_as::(field).unwrap_or("".into()), + } +} + +/// Split a formatting pattern of kind +/// `;;;;`, +/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")` +pub(super) fn split_formatting_pat( + pattern: &str, +) -> ( + &str, + Option, + Option, + Option<&str>, + Option<&str>, +) { + let mut splits = pattern.split(';'); + ( + splits + .next() + .expect("Need field value for formatting citekey"), + if let Some(next) = splits.next() + && next.len() > 0 + { + next.parse::().ok() + } else { + None + }, + if let Some(next) = splits.next() + && next.len() > 0 + { + next.parse::().ok() + } else { + None + }, + splits.next(), + splits.next(), + ) +} + +#[cfg(test)] +mod test { + use crate::bibiman::citekeys::citekey_utils::split_formatting_pat; + + #[test] + fn split_citekey_pattern() { + let pattern = "title;3;5;_;_"; + + assert_eq!( + split_formatting_pat(pattern), + ("title", Some(3), Some(5), Some("_"), Some("_")) + ); + + let pattern = "year"; + + assert_eq!( + split_formatting_pat(pattern), + ("year", None, None, None, None) + ); + + let pattern = "author;1;;;_"; + assert_eq!( + split_formatting_pat(pattern), + ("author", Some(1), None, Some(""), Some("_")) + ); + } +} diff --git a/tests/test-config.toml b/tests/test-config.toml index 2c5ac96..d3e42c5 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,5 +61,10 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = [ "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ] +fields = ["entrytype;;;;:", "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ] +# fields = [ # CamelCase test +# "author;2;;;", +# "title;5;5;;", +# "year" +# ] case = "lowercase" -- cgit v1.2.3 From 467851007e1861834326deee3116aa88fe839f5a Mon Sep 17 00:00:00 2001 From: lukeflo Date: Mon, 13 Oct 2025 15:45:53 +0200 Subject: Working proof of concept of citekey formatting --- CITEKEYS.md | 215 +++++++++++++++ Cargo.lock | 7 + Cargo.toml | 1 + README.md | 20 ++ src/bibiman/citekeys.rs | 69 +++-- src/bibiman/citekeys/citekey_utils.rs | 105 ++++---- src/config.rs | 69 +++++ tests/biblatex-test-citekeys.bib | 476 ++++++++++++++++++++++++++++++++++ tests/test-config.toml | 5 +- 9 files changed, 889 insertions(+), 78 deletions(-) create mode 100644 CITEKEYS.md create mode 100644 tests/biblatex-test-citekeys.bib diff --git a/CITEKEYS.md b/CITEKEYS.md new file mode 100644 index 0000000..912326a --- /dev/null +++ b/CITEKEYS.md @@ -0,0 +1,215 @@ +# Formatting Citekeys + + + +- [Formatting Citekeys](#formatting-citekeys) + - [Settings](#settings) + - [Building Patterns](#building-patterns) + - [Ignore Lists and Char Case](#ignore-lists-and-char-case) + - [General Tipps](#general-tipps) + - [Examples](#examples) + + + +`bibiman` offers the possibility to create new citekeys from the fields of +BibLaTeX entries. This is done using an easy but powerful pattern-matching +syntax. + +## Settings + +All settings for the citekey generation have to be configured in the used config +file. The regular path is `XDG_CONFIG_DIR/bibiman/bibiman.toml`. But it can be +set dynamically with the `-c`/`--config=` global option. + +Following values can be set through the config file. A detailed explanation for +all fields follows below: + +```toml +[citekey_formatter] +fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] +case = "lowercase" +ascii_only = true +ignored_chars = [ + "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", "\"", +] +ignored_words = [ + "the", + "a", + "an", + "of", + "for", + "in", + "at", + "to", + "and", + "der", + "die", + "das", + "ein", + "eine", + "eines", + "des", + "auf", + "und", + "für", + "vor", +] +``` + +## Building Patterns + +The main aspect for generating citekeys are the field patterns. They can be set +through an array in the config file where every array-item represents a single +BibLaTeX field to be used for generating a part of the citekey. + +Every field pattern consists of the following five parts separated by +semicolons. The general pattern looks like this (every subfield is explained +below): + +*biblatex field name* **;** *max word count* **;** *max char count* **;** *inner delimiter* **;** *trailing delimiter* + +- **BibLaTeX field**: the first part represents the field name which value + should be used to generate the content part of the citekey. Theoretically, any + BibLaTeX field can be selected by name. But there are some fields which are + much more common than others; e.g. `author`, `editor`, `title`, `year`/`date` + or `entrytype`. Those very common fields are preprocessed; meaning that for + instance LaTeX macros are fully stripped from the strings, or that `editor` is + a fallback value for `author` if the latter is empty (however, setting + `editor` explicitly is still possible). Also using `year` will parse the + `date` field too, to ensure a year number. +- **Max Word**: Defines how many words should maximal be used from the named + field. E.g. if the title consists of five words, and the max counter is set to + `3` only the first three fields will be used. +- **Max Chars/Word**: Defines how many chars, counting from the start, of each + word will be used to build the citekey. If for instance the value is set to + `5`, only the first five chars of any word will be used. Thus, "archaeology" + would be stripped down to "archa". +- **Inner Delimiter**: Sets the delimiter char used between words from the + currently named field; e.g. to separate the words of the `title` field. +- **Trailing Delimiter**: Sets the delimiter which separates the current fields + value from the following. This delimiter is only printed if the following + field has some content. + +For example, to use the `title` field, print maximal three words and of those +only the first five chars, single words separated by underscore and the whole +field separated by equal sign, insert the following pattern field into the +`fields` array: + +`title;3;5;_;=` + +Except the BibLaTeX field name, all other parts of the pattern can be left +blank. If the field name is the only value set, semicolon delimiters are also +not necessary. But if only one of the following parts should be set, all +delimiters need to be used. E.g. those are both valid: `title` or `title;;;_;=`. +The first would print all words of the title, no matter the length, not +separated by any char. The last would also print all words of the title, but +single words separated by underscores and the whole pattern value separated from +the following by an equal sign. This is not valid: `title;;_` since `bibiman` +can't know if the underscore means a delimiter (and which) or the max char +count. + +The pattern array inside the config file takes multiple pattern fields like the +predecing. This allows an elaborated citekey pattern which takes into account +multiple fields. + +## Ignore Lists and Char Case + +Beside the field patterns there are some other options to define how citekeys +should be built. + +`ascii_only=` +: If set to `true`, which is the default, non-ascii chars are mapped to their + ascii equivalent. For example, the German `ä` would be mapped to `a`. The + Turkish `ş` or Greek `σ`/`ς` would be mapped to `s`. If set to `false` all are + kept as they are. But this could lead to errors running LaTeX on the file. + +`case=` +: If used, sets the case of the chars in the citekey. Valid values are + `uppercase`, `lowercase` or `camelcase`. Both first should be clear, the + latter means typical camel case also beginning the *first word* with an + uppercase letter; also referenced as upper camel case or Pascal case. + +`ignored_chars=` +: Defines chars which should be ignored during parsing (meaning not print them). + The default list contains 33 special chars and is part of the default config + file (in out-commented state). Be aware, setting this key will completely + overwrite the default list! + +`ignored_words=` +: A list of words which should be ignored parsing field values. The default list + contains about 20 very commonly used words in English and German; like + articles, pronouns or connector words. Like with `ignored_chars` setting this + key will completely overwrite the default list! + +## General Tipps + +- Most importantly: *always use the **`--dry-run`** option first*! This will + print a list of old and new values for all citekeys in the file without + changing anything. +- After finding a good overall pattern, *use the `--output=` option* to create a + new file and don't overwrite your existent file. Thus, your original file + isn't broken if the key formatter produces some unwanted output. +- Even very long patterns are possible, they are not encouraged, since it bloats + the bibfiles. +- The same accounts for *too short* patterns; if the pattern is to unspecific, + it bares the risk of producing doublettes (e.g. single author and year only). + But the citekey generator will not check for doublettes! +- It is possible to keep special chars and use them as delimiters. But this + might cause problems other programs and CLI tools in particular, since many + special chars are reserved for shell operations. For instance, it will very + likely break the note file feature of `bibiman` which doesn't accept many + special chars. + +## Examples + +To make the process more clear a few examples might help. Following bibfile is +assumed: + +```latex +@article{Bos2023, + title = {{LaTeX}, metadata, and publishing workflows}, + author = {Bos, Joppe W. and {McCurley}, Kevin S.}, + year = {2023}, + month = apr, + journal = {arXiv}, + number = {{arXiv}:2301.08277}, + doi = {10.48550/arXiv.2301.08277}, + url = {http://arxiv.org/abs/2301.08277}, + urldate = {2023-08-22}, + note = {type: article}, +} +@book{Bhambra2021, + title = {Colonialism and \textbf{Modern Social Theory}}, + author = {Bhambra, Gurminder K. and Holmwood, John}, + location = {Cambridge and Medford}, + publisher = {Polity Press}, + date = {2021}, + +``` + +And the following values set in the config file: + +```toml +fields = [ + # Just print the whole entrytype and a colon as trailing delimiter + "entrytype;;;;:", + # Print all author names in full length, names separated by dash, + # the whole field by underscore + "author;;;-;_", + # Print first 4 words of title, first 3 chars of every word only. Title words + # separated by equal sign, the whole field by underscore + "title;4;3;=;_", + # Print all words of location, but only first 4 chars of every word. Single words + # separated by colon, whole field by underscore + "location;;4;:;_", + # Just print the whole year + "year", +] +case = "lowercase" +ascii_only = true +``` + +The combination of those setting will produce the following citekeys: + +- **`article:bos-mccurley_lat=met=pub=wor_2023`** +- **`book:bhambra-holmwood_col=mod=soc=the_camb:medf_2021`** diff --git a/Cargo.lock b/Cargo.lock index a27636e..0adb4e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,6 +103,7 @@ dependencies = [ "biblatex", "color-eyre", "crossterm", + "deunicode", "dirs", "editor-command", "figment", @@ -323,6 +324,12 @@ dependencies = [ "syn", ] +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + [[package]] name = "dirs" version = "5.0.1" diff --git a/Cargo.toml b/Cargo.toml index abf1eee..0c07c51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ owo-colors = "4.2.2" logos = "0.15.1" phf = { version = "0.13.1", features = ["macros"] } indoc = "2.0.6" +deunicode = "1.6.2" [workspace.metadata.cross.target.aarch64-unknown-linux-gnu] # Install libssl-dev:arm64, see diff --git a/README.md b/README.md index 4929509..3fb81c8 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,11 @@ - [Ubuntu/Debian](#ubuntudebian) - [Void Linux](#void-linux) - [Usage](#usage) + - [CLI for citekey formatting](#cli-for-citekey-formatting) - [Configuration](#configuration) - [Location of Config File](#location-of-config-file) - [General Configuration](#general-configuration) + - [Citekey formatting](#citekey-formatting) - [Color Configuration](#color-configuration) - [Features](#features) - [Keybindings](#keybindings) @@ -196,6 +198,13 @@ bibman tests/multi-files/ bibiman tests/biblatex-test.bib tests/multi-files/ ``` +### CLI for citekey formatting + +Beside the TUI `bibiman` can format and replace citekeys. To make use of this +feature run the program with the `format-citekeys` subcommand. For more +information on this use `bibiman format-citekeys --help` and the +[docs](./CITEKEYS.md). + ## Configuration ### Location of Config File @@ -268,6 +277,11 @@ note_symbol = "󰧮" ## Possible values are "journaltitle", "organization", "instituion", "publisher" ## and "pubtype" (which is the default) custom_column = "pubtype" + +[citekey_formatter] +fields = [] +ascii_only = true +case = "lowercase" ``` `bibfiles` @@ -326,6 +340,12 @@ custom_column = "pubtype" good advice to use a rather wide terminal window when using a value like `journaltitle`. +### Citekey formatting + +`bibiman` now also offers a citekey generating feature. This enables to reformat +all citekeys based on an elaborated pattern matching syntax. For furthter +information and examples see the [docs](CITEKEYS.md). + ### Color Configuration Furthermore, it is now possible to customize the colors. The following values diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 2f56947..0cec28e 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -29,7 +29,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bibiman::citekeys::citekey_utils::{build_citekey, formatting_help}, - config::BibiConfig, + config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; mod citekey_utils; @@ -60,6 +60,7 @@ pub(crate) struct CitekeyFormatting { case: Option, old_new_keys_map: Vec<(String, String)>, dry_run: bool, + ascii_only: bool, } impl CitekeyFormatting { @@ -69,14 +70,15 @@ impl CitekeyFormatting { ) -> color_eyre::Result<()> { let mut formatter = CitekeyFormatting::default(); - formatter.fields = cfg - .citekey_formatter - .fields - .clone() - .ok_or_eyre("Need to define fields correctly in config file")?; + formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!( + "Need to define {} correctly in config file", + "citekey pattern fields".red() + ))?; formatter.case = cfg.citekey_formatter.case.clone(); + formatter.ascii_only = cfg.citekey_formatter.ascii_only; + if formatter.fields.is_empty() { return Err(eyre!( "To format all citekeys, you need to provide {} values in the config file", @@ -105,13 +107,26 @@ impl CitekeyFormatting { formatter.bib_entries = Bibliography::parse(&bibstring) .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?; + let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars { + chars.as_slice() + } else { + IGNORED_SPECIAL_CHARS.as_slice() + }; + + let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words { + words.as_slice() + } else { + &*IGNORED_WORDS.as_slice() + }; + formatter - .do_formatting() + .do_formatting(ignored_chars, ignored_words) .rev_sort_new_keys_by_len() .update_file()?; Ok(()) } + /// Start Citekey formatting with building a new instance of `CitekeyFormatting` /// Formatting is processed file by file, because `bibman` can handle /// multi-file setups. @@ -144,16 +159,24 @@ impl CitekeyFormatting { case: cfg.citekey_formatter.case.clone(), old_new_keys_map: Vec::new(), dry_run: false, + ascii_only: cfg.citekey_formatter.ascii_only, }) } /// Process the actual formatting. The citekey of every entry will be updated. - pub fn do_formatting(&mut self) -> &mut Self { + pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter() { old_new_keys.push(( entry.key.clone(), - build_citekey(entry, &self.fields, self.case.as_ref()), + build_citekey( + entry, + &self.fields, + self.case.as_ref(), + self.ascii_only, + ignored_chars, + ignored_words, + ), )); } @@ -215,12 +238,15 @@ mod tests { use biblatex::Bibliography; - use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting}; + use crate::{ + bibiman::citekeys::{CitekeyCase, CitekeyFormatting}, + config::{IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, + }; #[test] fn format_citekey_test() { let src = r" - @article{bos_latex_metadata_and_publishing_workflows_2023, + @article{Bos2023, title = {{LaTeX}, metadata, and publishing workflows}, author = {Bos, Joppe W. and {McCurley}, Kevin S.}, year = {2023}, @@ -232,7 +258,7 @@ mod tests { urldate = {2023-08-22}, note = {type: article}, } - @book{bhambra_colonialism_social_theory_2021, + @book{Bhambra2021, title = {Colonialism and \textbf{Modern Social Theory}}, author = {Bhambra, Gurminder K. and Holmwood, John}, location = {Cambridge and Medford}, @@ -247,29 +273,24 @@ mod tests { fields: vec![ "entrytype;;;;:".into(), "author;;;-;_".into(), - "title;4;3;_;_".into(), + "title;4;3;=;_".into(), "location;;4;:;_".into(), "year".into(), ], - case: None, + case: Some(CitekeyCase::Lower), old_new_keys_map: Vec::new(), dry_run: false, + ascii_only: true, }; - let _ = formatting_struct.do_formatting(); + let _ = formatting_struct + .do_formatting(IGNORED_SPECIAL_CHARS.as_slice(), &*IGNORED_WORDS.as_slice()); assert_eq!( formatting_struct.old_new_keys_map.get(0).unwrap().1, - "article:Bos-McCurley_LaT_met_and_pub_Empt_2023" + "article:bos-mccurley_lat=met=pub=wor_2023" ); assert_eq!( formatting_struct.old_new_keys_map.get(1).unwrap().1, - "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" - ); - formatting_struct.case = Some(CitekeyCase::Lower); - let _ = formatting_struct.do_formatting().rev_sort_new_keys_by_len(); - // now the longer citekey is processed first and its in lowercase! - assert_eq!( - formatting_struct.old_new_keys_map.get(0).unwrap().1, - "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" + "book:bhambra-holmwood_col=mod=soc=the_camb:medf_2021" ); } diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index ee2c849..5f70224 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -16,21 +16,14 @@ ///// use biblatex::{ChunksExt, Entry, Type}; +use deunicode::deunicode; use indoc::formatdoc; use owo_colors::{ OwoColorize, colors::{BrightBlue, Green, White}, }; -use crate::{ - bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}, - config::IGNORED_SPECIAL_CHARS, -}; - -const IGNORE_WORDS: [&str; 20] = [ - "the", "a", "an", "of", "for", "in", "at", "to", "and", "der", "die", "das", "ein", "eine", - "eines", "des", "auf", "und", "für", "vor", -]; +use crate::bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}; pub(super) fn formatting_help() { let help = vec![ @@ -104,6 +97,9 @@ pub(super) fn build_citekey( entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>, + ascii_only: bool, + ignored_chars: &[char], + ignored_words: &[String], ) -> String { // mut string the citekey is built from let mut new_citekey = String::new(); @@ -114,7 +110,7 @@ pub(super) fn build_citekey( // loop over pattern fields process them 'field_loop: for pattern in pattern_fields.iter() { // parse single values from pattern field - let (field_name, word_count, char_count, inner_delimiter, cur_trailing_delimiter) = + let (field_name, max_words, max_chars, inner_delimiter, cur_trailing_delimiter) = split_formatting_pat(pattern); // built the part of the citekey from the current pattern field @@ -126,16 +122,9 @@ pub(super) fn build_citekey( // split at whitespaces, count fields and set counter for processed // splits - let mut split_field = field.split_whitespace(); + let split_field = field.split_whitespace(); let mut words_passed = 0; let field_count = field.split_whitespace().count(); - let word_count = if let Some(val) = word_count - && val <= field_count - { - val - } else { - field_count - }; // If there is a trailing delimiter from the previous field, push it if let Some(del) = trailing_delimiter { @@ -152,47 +141,57 @@ pub(super) fn build_citekey( } // loop over single parts of current field and add correct delimiter - 'word_loop: loop { - // process the single slices and add correct delimiter - if let Some(field_slice) = split_field.next() { - // Create word slice char by char. We need to loop over chars - // instead of a simple bytes index to also catch chars which - // consist of more than one byte (äöüøæ etc...) - let mut word_slice = String::new(); - let word_chars = field_slice.chars(); - let mut counter = 0; - 'char_loop: for mut c in word_chars { - // If camelcase is set, force first char of word to uppercase - if counter == 0 && case == Some(&CitekeyCase::Camel) { - c = c.to_ascii_uppercase() - } - if let Some(len) = char_count - && counter == len - { - break 'char_loop; - } - // if a word slice contains a special char, skip it - if IGNORED_SPECIAL_CHARS.contains(&c) { - continue 'char_loop; - } + // process the single slices and add correct delimiter + 'word_loop: for (idx, field_slice) in split_field.enumerate() { + // if the current slice is a common word from the ignore list, + // skip it. + if ignored_words.contains(&field_slice.to_lowercase()) { + continue; + } + + // Create word slice char by char. We need to loop over chars + // instead of a simple bytes index to also catch chars which + // consist of more than one byte (äöüøæ etc...) + let mut word_slice = String::new(); + let word_chars = field_slice.chars(); + let mut counter = 0; + 'char_loop: for mut c in word_chars { + // If camelcase is set, force first char of word to uppercase + if counter == 0 && case == Some(&CitekeyCase::Camel) { + c = c.to_ascii_uppercase() + } + if let Some(len) = max_chars + && counter >= len + { + break 'char_loop; + } + // if a word slice contains a special char, skip it + if ignored_chars.contains(&c) { + continue 'char_loop; + } + // if non-ascii chars should be mapped, check if needed and do it + if let Some(chars) = deunicode::deunicode_char(c) + && ascii_only + { + word_slice.push_str(chars); + counter += chars.len(); + } else { word_slice.push(c); counter += 1; } - // Don't count empty slices and don't add delimiter to those - if !word_slice.is_empty() { - formatted_str = formatted_str + &word_slice; - words_passed += 1; - if word_count == words_passed { - break 'word_loop; - } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or(""); - } + } + // Don't count empty slices and don't add delimiter to those + if !word_slice.is_empty() { + formatted_str = formatted_str + &word_slice; + words_passed += 1; + if max_words.is_some_and(|max| max == words_passed) || idx + 1 == field_count { + break 'word_loop; } else { - continue 'word_loop; + formatted_str = formatted_str + inner_delimiter.unwrap_or(""); } } else { - break 'word_loop; - }; + continue 'word_loop; + } } formatted_str }; diff --git a/src/config.rs b/src/config.rs index b1c4b07..7c1a0f8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -20,6 +20,7 @@ use std::{ io::{Write, stdin}, path::PathBuf, str::FromStr, + sync::LazyLock, }; use color_eyre::{eyre::Result, owo_colors::OwoColorize}; @@ -40,6 +41,31 @@ pub const IGNORED_SPECIAL_CHARS: [char; 33] = [ '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', ]; +pub static IGNORED_WORDS: LazyLock> = LazyLock::new(|| { + vec![ + String::from("the"), + String::from("a"), + String::from("an"), + String::from("of"), + String::from("for"), + String::from("in"), + String::from("at"), + String::from("to"), + String::from("and"), + String::from("der"), + String::from("die"), + String::from("das"), + String::from("ein"), + String::from("eine"), + String::from("eines"), + String::from("des"), + String::from("auf"), + String::from("und"), + String::from("für"), + String::from("vor"), + ] +}); + const DEFAULT_CONFIG: &str = r##" # [general] ## Default files/dirs which are loaded on startup @@ -118,6 +144,40 @@ const DEFAULT_CONFIG: &str = r##" ## Convert chars to specified case. Possible values: ## "upper", "uppercase", "lower", "lowercase" # case = "lowercase" + +## Map all unicode chars to their pure ascii equivalent +# ascii_only = true + +## List of special chars that'll be ignored when building citekeys. +## A custom list will overwrite the default list +# ignored_chars = [ +# "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", """, +# ] + +## List of words that'll be ignored when building citekeys. +## A custom list will overwrite the default list +# ignored_words = [ +# "the", +# "a", +# "an", +# "of", +# "for", +# "in", +# "at", +# "to", +# "and", +# "der", +# "die", +# "das", +# "ein", +# "eine", +# "eines", +# "des", +# "auf", +# "und", +# "für", +# "vor", +# ] "##; /// Main struct of the config file. Contains substructs/headings in toml @@ -171,6 +231,9 @@ pub struct Colors { pub struct CitekeyFormatter { pub fields: Option>, pub case: Option, + pub ascii_only: bool, + pub ignored_chars: Option>, + pub ignored_words: Option>, } impl Default for BibiConfig { @@ -194,6 +257,9 @@ impl Default for BibiConfig { citekey_formatter: CitekeyFormatter { fields: None, case: None, + ascii_only: true, + ignored_chars: None, + ignored_words: None, }, } } @@ -224,6 +290,9 @@ impl BibiConfig { citekey_formatter: CitekeyFormatter { fields: None, case: None, + ascii_only: true, + ignored_chars: None, + ignored_words: None, }, } } diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib new file mode 100644 index 0000000..9767f97 --- /dev/null +++ b/tests/biblatex-test-citekeys.bib @@ -0,0 +1,476 @@ +@set{set, + entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006}, + annotation = {A \texttt{set} with three members.}, +} + +@set{set, + entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam}, + annotation = {A \texttt{set} with three members discussing the standard + model of particle physics.}, +} + +@collection{collection:matuz-miller_contem=litera=critic_1990gale, + title = {Contemporary Literary Criticism}, + year = {1990}, + location = {Detroit}, + publisher = {Gale}, + volume = {61}, + pages = {204--208}, + editor = {Matuz, Roger and Miller, Helen}, + keywords = {narration}, + langid = {english}, + langidopts = {variant=american}, + annotation = {A \texttt{collection} entry providing the excerpt information + for the \texttt{article:doody_heming=style=jakes_1974} entry. Note the format of the \texttt{ + pages} field}, +} + +@article{article:aksin-turkmen_effect=immobi=on_2006, + title = {Effect of immobilization on catalytic characteristics of saturated + {Pd-N}-heterocyclic carbenes in {Mizoroki-Heck} reactions}, + author = {Aks{\i}n, {\"O}zge and T{\"u}rkmen, Hayati and Artok , Levent and + { \c{C}}etinkaya, Bekir and Ni, Chaoying and B{\" u}y{ \"u}kg{\"u} + ng{ \" o}r, Orhan and {\"O}zkal, Erhan}, + volume = {691}, + number = {13}, + pages = {3027--3036}, + journaltitle = jomch, + date = {2006}, + indextitle = {Effect of immobilization on catalytic characteristics}, +} + +@article{article:angenendt_honore=salvat=vom_2002, + title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der Patrozinienkunde}, + shorttitle = {In Honore Salvatoris}, + author = {Angenendt, Arnold}, + volume = {97}, + pages = {431--456, 791--823}, + journaltitle = {Revue d'Histoire Eccl{\'e}siastique}, + date = {2002}, + langid = {german}, + indextitle = {In Honore Salvatoris}, + annotation = {A German article in a French journal. Apart from that, a + typical \texttt{article} entry. Note the \texttt{indextitle} + field}, +} + +@book{book:aristotle_de=anima_1907cambr#unive#press, + title = {De Anima}, + author = {Aristotle}, + location = {Cambridge}, + publisher = cup, + date = {1907}, + editor = {Hicks, Robert Drew}, + keywords = {primary, ancient, philosophy, athens}, + langid = {english}, + langidopts = {variant=british}, + annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ + editor}}, +} + +@book{book:aristotle_physic_1929g#p#putna, + title = {Physics}, + shorttitle = {Physics}, + author = {Aristotle}, + location = {New York}, + publisher = {G. P. Putnam}, + url = {https://www.infobooks.org/authors/classic/aristotle-books/#Physic}, + date = {1929}, + translator = {Wicksteed, P. H. and Cornford, F. M.}, + keywords = {primary, ancient, philosophy}, + langid = {english}, + langidopts = {variant=american}, + file = {~/Documents/coding/projects/bibiman/tests/book:aristotle_physic_1929g#p#putna.pdf}, + annotation = {A \texttt{book} entry with a \texttt{translator} field}, + abstract = {The Physics is a work by Aristotle dedicated to the study of + nature. Regarded by Heidegger as "the fundamental work of Western + philosophy", it presents the renowned distinction between the + four types of cause, as well as reflections on chance, motion, + infinity, and other fundamental concepts. It is here that + Aristotle sets out his celebrated paradox of time.}, +} + +@book{book:aristotle_poetic_1968clare#press, + title = {Poetics}, + shorttitle = {Poetics}, + author = {Aristotle}, + location = {Oxford}, + publisher = {Clarendon Press}, + series = {Clarendon {Aristotle}}, + date = {1968}, + editor = {Lucas, D. W.}, + keywords = {primary}, + langid = {english}, + langidopts = {variant=british}, + annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ + editor} as well as a \texttt{series} field}, +} + +@mvbook{mvbook:aristotle_rhetor=aristo=with_1877cambr#unive#press, + title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward + Meredith Cope}}, + shorttitle = {Rhetoric}, + author = {Aristotle}, + publisher = cup, + date = {1877}, + editor = {Cope, Edward Meredith}, + commentator = {Cope, Edward Meredith}, + volumes = {3}, + keywords = {primary}, + langid = {english}, + langidopts = {variant=british}, + sorttitle = {Rhetoric of Aristotle}, + indextitle = {Rhetoric of {Aristotle}, The}, + annotation = {A commented edition. Note the concatenation of the \texttt{ + editor} and \texttt{commentator} fields as well as the \texttt{ + volumes}, \texttt{sorttitle}, and \texttt{indextitle} fields}, +} + +@book{book:augustine_hetero=cataly=synthe_1995marce#dekke, + title = {Heterogeneous catalysis for the synthetic \textit{chemist}}, + shorttitle = {Heterogeneous catalysis}, + author = {Augustine, Robert L.}, + location = {New York}, + publisher = {Marcel Dekker}, + date = {1995}, + langid = {english}, + langidopts = {variant=american}, + annotation = {A plain \texttt{book} entry}, + keywords = {chemistry}, +} + +@book{book:averroes_epistl=on=possib_1982jewis#theol#semin#ameri, + title = {The Epistle on the Possibility of Conjunction with the Active + Intellect by {Ibn Rushd} with the Commentary of {Moses Narboni}}, + shorttitle = {Possibility of Conjunction}, + author = {Averroes}, + location = {New York}, + publisher = {Jewish Theological Seminary of America}, + series = {Moreshet: Studies in {Jewish} History, Literature and Thought}, + number = {7}, + date = {1982}, + editor = {Bland, Kalman P.}, + translator = {Bland, Kalman P.}, + keywords = {primary}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Epistle on the Possibility of Conjunction, The}, + annotation = {A \texttt{book} entry with a \texttt{series} and a \texttt{ + number}. Note the concatenation of the \texttt{editor} and + \texttt{translator} fields as well as the \texttt{indextitle} + field}, +} + +@article{article:baez-lauda_higher=algebr=v_2004, + title = {Higher-Dimensional Algebra {V}: 2-Groups}, + author = {Baez, John C. and Lauda, Aaron D.}, + volume = {12}, + pages = {423--491}, + journaltitle = {Theory and Applications of Categories}, + date = {2004}, + version = {3}, + eprint = {math/0307200v3}, + eprinttype = {arxiv}, + langid = {english}, + keywords = {math}, + langidopts = {variant=american}, + annotation = {An \texttt{article} with \texttt{eprint} and \texttt{ + eprinttype} fields. Note that the arXiv reference is + transformed into a clickable link if \texttt{hyperref} support + has been enabled. Compare \texttt{baez\slash online}, which is + the same item given as an \texttt{online} entry}, +} + +@article{article:bertram-wentworth_gromov=invari=holomo_1996, + title = {Gromov invariants for holomorphic maps on {Riemann} surfaces}, + shorttitle = {Gromov invariants}, + author = {Bertram, Aaron and Wentworth, Richard}, + volume = {9}, + number = {2}, + pages = {529--571}, + journaltitle = jams, + date = {1996}, + langid = {english}, + langidopts = {variant=american}, + annotation = {An \texttt{article} entry with a \texttt{volume} and a \texttt + {number} field}, +} + +@article{article:doody_heming=style=jakes_1974, + title = {Hemingway's Style and {Jake's} Narration}, + author = {Doody, Terrence}, + year = {1974}, + journal = {The Journal of Narrative Technique}, + volume = {4}, + number = {3}, + pages = {212--225}, + langid = {english}, + langidopts = {variant=american}, + related = {matuz:article:doody_heming=style=jakes_1974}, + relatedstring = {\autocap{e}xcerpt in}, + annotation = {An \texttt{article} entry cited as an excerpt from a \texttt{ + collection} entry. Note the format of the \texttt{related} and + \texttt{relatedstring} fields}, +} + +@article{article:gillies_herder=prepar=goethe_1933, + title = {Herder and the Preparation of {Goethe's} Idea of World Literature}, + author = {Gillies, Alexander}, + series = {newseries}, + volume = {9}, + pages = {46--67}, + journaltitle = {Publications of the English Goethe Society}, + date = {1933}, + langid = {english}, + langidopts = {variant=british}, + annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt + {volume} field. Note that format of the \texttt{series} field + in the database file}, +} + +@article{article:glashow_partia=symmet=weak_1961, + title = {Partial Symmetries of Weak Interactions}, + author = {Glashow, Sheldon}, + volume = {22}, + pages = {579--588}, + journaltitle = {Nucl.~Phys.}, + date = {1961}, +} + +@article{article:herrmann-ofele_carboc=carben=as_2006, + title = {A carbocyclic carbene as an efficient catalyst ligand for {C--C} + coupling reactions}, + author = {Herrmann, Wolfgang A. and {\"O}fele, Karl and Schneider, Sabine K. + and Herdtweck, Eberhardt and Hoffmann, Stephan D.}, + volume = {45}, + number = {23}, + pages = {3859--3862}, + journaltitle = anch-ie, + date = {2006}, + indextitle = {Carbocyclic carbene as an efficient catalyst, A}, +} + +@article{article:hostetler-wingate_alkane=gold=cluste_1998, + title = {Alkanethiolate gold cluster molecules with core diameters from 1.5 + to 5.2~{nm}}, + shorttitle = {Alkanethiolate gold cluster molecules}, + author = {Hostetler, Michael J. and Wingate, Julia E. and Zhong, Chuan-Jian + and Harris, Jay E. and Vachet, Richard W. and Clark, Michael R. and + Londono, J. David and Green, Stephen J. and Stokes, Jennifer J. and + Wignall, George D. and Glish, Gary L. and Porter, Marc D. and Evans + , Neal D. and Murray, Royce W.}, + volume = {14}, + number = {1}, + pages = {17--30}, + journaltitle = {Langmuir}, + date = {1998}, + subtitle = {Core and monolayer properties as a function of core size}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Alkanethiolate gold cluster molecules}, + annotation = {An \texttt{article} entry with \arabic{author} authors. By + default, long author and editor lists are automatically + truncated. This is configurable}, +} + +@article{article:kastenholz-hunenberger_comput=method=ionic_2006, + title = {Computation of methodology\hyphen independent ionic solvation free + energies from molecular simulations}, + author = {Kastenholz, M. A. and H{\"u}nenberger, Philippe H.}, + volume = {124}, + doi = {10.1063/1.2172593}, + journaltitle = jchph, + date = {2006}, + subtitle = {{I}. {The} electrostatic potential in molecular liquids}, + eid = {124106}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Computation of ionic solvation free energies}, + annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{ + doi} field. Note that the \textsc{doi} is transformed into a + clickable link if \texttt{hyperref} support has been enabled}, + abstract = {The computation of \texttt{ionic} solvation free energies from atomistic + simulations is a surprisingly difficult problem that has found no + satisfactory solution for more than 15 years. The reason is that + the charging free energies evaluated from such simulations are + affected by very large errors. One of these is related to the + choice of a specific convention for summing up the contributions + of solvent charges to the electrostatic potential in the ionic + cavity, namely, on the basis of point charges within entire + solvent molecules (M scheme) or on the basis of individual point + charges (P scheme). The use of an inappropriate convention may + lead to a charge-independent offset in the calculated potential, + which depends on the details of the summation scheme, on the + quadrupole-moment trace of the solvent molecule, and on the + approximate form used to represent electrostatic interactions in + the system. However, whether the M or P scheme (if any) + represents the appropriate convention is still a matter of + on-going debate. The goal of the present article is to settle + this long-standing controversy by carefully analyzing (both + analytically and numerically) the properties of the electrostatic + potential in molecular liquids (and inside cavities within them). + }, +} + +@article{article:sarfraz-razzak_techni=sectio=algori_2002, + title = {Technical section: {An} algorithm for automatic capturing of the + font outlines}, + author = {M. Sarfraz and M. F. A. Razzak}, + year = {2002}, + journal = {Computers and Graphics}, + volume = {26}, + number = {5}, + pages = {795--804}, + issn = {0097-8493}, + annotation = {An \texttt{article} entry with an \texttt{issn} field}, +} + +@article{article:reese_georgi=anglos=diplom_1958, + title = {Georgia in {Anglo-Spanish} Diplomacy, 1736--1739}, + author = {Reese, Trevor R.}, + series = {3}, + volume = {15}, + pages = {168--190}, + journaltitle = {William and Mary Quarterly}, + date = {1958}, + langid = {english}, + langidopts = {variant=american}, + annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt + {volume} field. Note the format of the series. If the value of + the \texttt{series} field is an integer, this number is printed + as an ordinal and the string \enquote*{series} is appended + automatically}, +} + +@article{article:shore_twiceb=once=concei_1991, + title = {Twice-Born, Once Conceived}, + author = {Shore, Bradd}, + series = {newseries}, + volume = {93}, + number = {1}, + pages = {9--27}, + journaltitle = {American Anthropologist}, + date = {1991-03}, + subtitle = {Meaning Construction and Cultural Cognition}, + annotation = {An \texttt{article} entry with \texttt{series}, \texttt{volume + }, and \texttt{number} fields. Note the format of the \texttt{ + series} which is a localization key}, +} + +@article{article:sigfridsson-ryde_compar=method=derivi_1998, + title = {Comparison of methods for deriving atomic charges from the + electrostatic potential and moments}, + author = {Sigfridsson, Emma and Ryde, Ulf}, + volume = {19}, + number = {4}, + pages = {377--395}, + doi = {10.1002/(SICI)1096-987X(199803)19:4<377::AID-JCC1>3.0.CO;2-P}, + journaltitle = {Journal of Computational Chemistry}, + date = {1998}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Methods for deriving atomic charges}, + annotation = {An \texttt{article} entry with \texttt{volume}, \texttt{number + }, and \texttt{doi} fields. Note that the \textsc{doi} is + transformed into a clickable link if \texttt{hyperref} support + has been enabled}, + abstract = {Four methods for deriving partial atomic charges from the + quantum chemical electrostatic potential (CHELP, CHELPG, + Merz-Kollman, and RESP) have been compared and critically + evaluated. It is shown that charges strongly depend on how and + where the potential points are selected. Two alternative methods + are suggested to avoid the arbitrariness in the point-selection + schemes and van der Waals exclusion radii: CHELP-BOW, which also + estimates the charges from the electrostatic potential, but with + potential points that are Boltzmann-weighted after their + occurrence in actual simulations using the energy function of the + program in which the charges will be used, and CHELMO, which + estimates the charges directly from the electrostatic multipole + moments. Different criteria for the quality of the charges are + discussed.}, +} + +@article{article:spiegelberg_intent=intent=schola_1969, + title = {\mkbibquote{Intention} und \mkbibquote{Intentionalit{\"a}t} in der + Scholastik, bei Brentano und Husserl}, + shorttitle = {Intention und Intentionalit{\"a}t}, + author = {Spiegelberg, Herbert}, + volume = {29}, + pages = {189--216}, + journaltitle = {Studia Philosophica}, + date = {1969}, + langid = {german}, + sorttitle = {Intention und Intentionalitat in der Scholastik, bei Brentano + und Husserl}, + indexsorttitle = {Intention und Intentionalitat in der Scholastik, bei + Brentano und Husserl}, + annotation = {An \texttt{article} entry. Note the \texttt{sorttitle} and + \texttt{indexsorttitle} fields and the markup of the quotes in + the database file}, +} + +@article{article:springer_mediae=pilgri=routes_1950, + title = {Mediaeval Pilgrim Routes from {Scandinavia} to {Rome}}, + shorttitle = {Mediaeval Pilgrim Routes}, + author = {Springer, Otto}, + volume = {12}, + pages = {92--122}, + journaltitle = {Mediaeval Studies}, + date = {1950}, + langid = {english}, + langidopts = {variant=british}, + annotation = {A plain \texttt{article} entry}, +} + +@article{article:weinberg_model=lepton_1967, + title = {A Model of Leptons}, + author = {Weinberg, Steven}, + volume = {19}, + pages = {1264--1266}, + journaltitle = {Phys.~Rev.~Lett.}, + date = {1967}, +} + +@string{anch-ie = {Angew.~Chem. Int.~Ed.}} + +@string{cup = {Cambridge University Press}} + +@string{dtv = {Deutscher Taschenbuch-Verlag}} + +@string{hup = {Harvard University Press}} + +@string{jams = {J.~Amer. Math. Soc.}} + +@string{jchph = {J.~Chem. Phys.}} + +@string{jomch = {J.~Organomet. Chem.}} + +@string{pup = {Princeton University Press}} + +@incollection{incollection:westfahl_true=fronti, + title = {The True Frontier}, + author = {Westfahl, Gary}, + pages = {55--65}, + subtitle = {Confronting and Avoiding the Realities of Space in {American} + Science Fiction Films}, + crossref = {westfahl:frontier}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {True Frontier, The}, + annotation = {A cross-referenced article from a \texttt{collection}. This is + an \texttt{incollection} entry with a \texttt{crossref} field. + Note the \texttt{subtitle} and \texttt{indextitle} fields}, +} + +@article{article:yoon-ryu_pallad=pincer=comple_2006, + title = {Palladium pincer complexes with reduced bond angle strain: + efficient catalysts for the {Heck} reaction}, + author = {Yoon, Myeong S. and Ryu, Dowook and Kim, Jeongryul and Ahn, Kyo + Han}, + volume = {25}, + number = {10}, + pages = {2409--2411}, + journaltitle = {Organometallics}, + date = {2006}, + indextitle = {Palladium pincer complexes}, +} diff --git a/tests/test-config.toml b/tests/test-config.toml index d3e42c5..8dd8014 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,10 +61,13 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = ["entrytype;;;;:", "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ] +fields = ["shorthand;;;;+","entrytype;;;;:", "author;2;;-;_", "title;3;6;=;_", "year", "publisher;;5;#;" ] # fields = [ # CamelCase test # "author;2;;;", # "title;5;5;;", # "year" # ] case = "lowercase" +ascii_only = true +# ignored_words = ["the"] +# ignored_chars = ["?", "."] -- cgit v1.2.3 From c62b83e02359c24973344699116acc12b4a04108 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 08:54:35 +0200 Subject: skip set and xdata entries by default --- src/bibiman/citekeys.rs | 7 +++++-- src/bibiman/citekeys/citekey_utils.rs | 5 +++++ src/config.rs | 10 ++++++++++ tests/biblatex-test-citekeys.bib | 4 ++-- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 0cec28e..999c6cb 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -28,7 +28,7 @@ use owo_colors::OwoColorize; use serde::{Deserialize, Serialize}; use crate::{ - bibiman::citekeys::citekey_utils::{build_citekey, formatting_help}, + bibiman::citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help}, config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; @@ -167,6 +167,10 @@ impl CitekeyFormatting { pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter() { + // Skip specific entries + if SKIPPED_ENTRIES.contains(&entry.entry_type.to_string().to_lowercase().as_str()) { + continue; + } old_new_keys.push(( entry.key.clone(), build_citekey( @@ -181,7 +185,6 @@ impl CitekeyFormatting { } self.old_new_keys_map = old_new_keys; - // dbg!(&self.old_new_keys_map); self } diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index 5f70224..58a8274 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -15,6 +15,8 @@ // along with this program. If not, see . ///// +use std::sync::LazyLock; + use biblatex::{ChunksExt, Entry, Type}; use deunicode::deunicode; use indoc::formatdoc; @@ -25,6 +27,8 @@ use owo_colors::{ use crate::bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}; +pub(super) const SKIPPED_ENTRIES: [&str; 2] = ["set", "xdata"]; + pub(super) fn formatting_help() { let help = vec![ formatdoc!( @@ -200,6 +204,7 @@ pub(super) fn build_citekey( match case { Some(CitekeyCase::Lower) => new_citekey.to_lowercase(), Some(CitekeyCase::Upper) => new_citekey.to_uppercase(), + // otherwise skip, since camelcase is processed in char loop _ => new_citekey, } } diff --git a/src/config.rs b/src/config.rs index 7c1a0f8..b8d8b45 100644 --- a/src/config.rs +++ b/src/config.rs @@ -52,6 +52,10 @@ pub static IGNORED_WORDS: LazyLock> = LazyLock::new(|| { String::from("at"), String::from("to"), String::from("and"), + String::from("him"), + String::from("her"), + String::from("his"), + String::from("hers"), String::from("der"), String::from("die"), String::from("das"), @@ -63,6 +67,12 @@ pub static IGNORED_WORDS: LazyLock> = LazyLock::new(|| { String::from("und"), String::from("für"), String::from("vor"), + String::from("er"), + String::from("sie"), + String::from("es"), + String::from("ihm"), + String::from("ihr"), + String::from("ihnen"), ] }); diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib index 9767f97..34c2f33 100644 --- a/tests/biblatex-test-citekeys.bib +++ b/tests/biblatex-test-citekeys.bib @@ -1,9 +1,9 @@ -@set{set, +@set{SET, entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006}, annotation = {A \texttt{set} with three members.}, } -@set{set, +@set{stdmodel, entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam}, annotation = {A \texttt{set} with three members discussing the standard model of particle physics.}, -- cgit v1.2.3 From 18fa9b8bcb02aa5653b976cad7ec9c3123d4f372 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 10:11:52 +0200 Subject: add dry-run example to citekeys doc --- CITEKEYS.md | 10 ++++++---- tests/test-config.toml | 9 ++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/CITEKEYS.md b/CITEKEYS.md index 912326a..165f58c 100644 --- a/CITEKEYS.md +++ b/CITEKEYS.md @@ -66,7 +66,8 @@ Every field pattern consists of the following five parts separated by semicolons. The general pattern looks like this (every subfield is explained below): -*biblatex field name* **;** *max word count* **;** *max char count* **;** *inner delimiter* **;** *trailing delimiter* +*biblatex field name* **;** *max word count* **;** *max char count* **;** *inner +delimiter* **;** *trailing delimiter* - **BibLaTeX field**: the first part represents the field name which value should be used to generate the content part of the citekey. Theoretically, any @@ -145,9 +146,10 @@ should be built. - Most importantly: *always use the **`--dry-run`** option first*! This will print a list of old and new values for all citekeys in the file without - changing anything. + changing anything. For the test file of this repo and using the pattern from + the [section below](#examples) `----dry-run` produces the following output: - After finding a good overall pattern, *use the `--output=` option* to create a - new file and don't overwrite your existent file. Thus, your original file + new file and don't overwrite your existing file. Thus, your original file isn't broken if the key formatter produces some unwanted output. - Even very long patterns are possible, they are not encouraged, since it bloats the bibfiles. @@ -155,7 +157,7 @@ should be built. it bares the risk of producing doublettes (e.g. single author and year only). But the citekey generator will not check for doublettes! - It is possible to keep special chars and use them as delimiters. But this - might cause problems other programs and CLI tools in particular, since many + might cause problems for other programs and CLI tools in particular, since many special chars are reserved for shell operations. For instance, it will very likely break the note file feature of `bibiman` which doesn't accept many special chars. diff --git a/tests/test-config.toml b/tests/test-config.toml index 8dd8014..704d8d8 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,7 +61,14 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = ["shorthand;;;;+","entrytype;;;;:", "author;2;;-;_", "title;3;6;=;_", "year", "publisher;;5;#;" ] +fields = [ + "shorthand;;;;+", + "entrytype;;;;:", + "author;2;;-;_", + "title;3;6;=;_", + "year", + "publisher;;5;#;" +] # fields = [ # CamelCase test # "author;2;;;", # "title;5;5;;", -- cgit v1.2.3 From 549f89c554ae70af28a9c7276673f0f77b488165 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 10:21:39 +0200 Subject: add personal experience to citekey docs --- CITEKEYS.md | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/CITEKEYS.md b/CITEKEYS.md index 165f58c..19bd497 100644 --- a/CITEKEYS.md +++ b/CITEKEYS.md @@ -148,6 +148,7 @@ should be built. print a list of old and new values for all citekeys in the file without changing anything. For the test file of this repo and using the pattern from the [section below](#examples) `----dry-run` produces the following output: + [![niri-screenshot-2025-10-14-10-11-06.png](https://i.postimg.cc/SxxRkY8K/niri-screenshot-2025-10-14-10-11-06.png)](https://postimg.cc/bs4pRJmX) - After finding a good overall pattern, *use the `--output=` option* to create a new file and don't overwrite your existing file. Thus, your original file isn't broken if the key formatter produces some unwanted output. @@ -157,9 +158,9 @@ should be built. it bares the risk of producing doublettes (e.g. single author and year only). But the citekey generator will not check for doublettes! - It is possible to keep special chars and use them as delimiters. But this - might cause problems for other programs and CLI tools in particular, since many - special chars are reserved for shell operations. For instance, it will very - likely break the note file feature of `bibiman` which doesn't accept many + might cause problems for other programs and CLI tools in particular, since + many special chars are reserved for shell operations. For instance, it will + very likely break the note file feature of `bibiman` which doesn't accept many special chars. ## Examples @@ -215,3 +216,25 @@ The combination of those setting will produce the following citekeys: - **`article:bos-mccurley_lat=met=pub=wor_2023`** - **`book:bhambra-holmwood_col=mod=soc=the_camb:medf_2021`** + +**Personal Note** + +I use the following pattern to format the citekeys of my bibfiles: + +```toml +[citekey_formatter] +fields = [ + "author;1;;;_", + "title;3;7;-;_", + "year;;;;_", + "entrytype;;;;_", + "shorthand", +] +case = "lowercase" +ascii_only = true +``` + +It produces citekeys with enough information to quickly identify the underlying +work while not being too long; at least in my opinion. The shorthand at the end +is only printed in a few cases, but shows me that the specific work might differ +from standard articles/books etc. -- cgit v1.2.3 From 9b21727bd151a3fda2133b9da12eec588068130e Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 14:30:56 +0200 Subject: use citekey formatter for adding new entries via doi --- src/bibiman.rs | 105 +++++++++++++----------------------------- src/bibiman/citekeys.rs | 118 ++++++++++++++++++++++++++---------------------- 2 files changed, 97 insertions(+), 126 deletions(-) diff --git a/src/bibiman.rs b/src/bibiman.rs index 3158d73..392ae95 100644 --- a/src/bibiman.rs +++ b/src/bibiman.rs @@ -16,22 +16,23 @@ ///// use crate::app::expand_home; +use crate::bibiman::citekeys::CitekeyFormatting; use crate::bibiman::entries::EntryTableColumn; use crate::bibiman::{bibisetup::*, search::BibiSearch}; use crate::cliargs::CLIArgs; use crate::config::BibiConfig; -use crate::tui::popup::{PopupArea, PopupItem, PopupKind}; use crate::tui::Tui; +use crate::tui::popup::{PopupArea, PopupItem, PopupKind}; use crate::{app, cliargs}; use crate::{bibiman::entries::EntryTable, bibiman::keywords::TagList}; use arboard::Clipboard; -use color_eyre::eyre::{Context, Error, Result}; +use biblatex::Bibliography; +use color_eyre::eyre::{Context, Error, Result, eyre}; use crossterm::event::KeyCode; use editor_command::EditorBuilder; use ratatui::widgets::ScrollbarState; -use regex::Regex; use std::ffi::OsStr; -use std::fs::{self, read_to_string}; +use std::fs::{self}; use std::fs::{File, OpenOptions}; use std::io::Write; use std::path::PathBuf; @@ -190,7 +191,9 @@ impl Bibiman { self.popup_area.popup_message = message.unwrap().to_owned(); Ok(()) } else { - Err(Error::msg("You need to past at least a message via Some(&str) to create a message popup")) + Err(Error::msg( + "You need to past at least a message via Some(&str) to create a message popup", + )) } } PopupKind::MessageError => { @@ -202,7 +205,9 @@ impl Bibiman { self.popup_area.popup_message = message.unwrap().to_owned(); Ok(()) } else { - Err(Error::msg("You need to past at least a message via Some(&str) to create a message popup")) + Err(Error::msg( + "You need to past at least a message via Some(&str) to create a message popup", + )) } } PopupKind::OpenRes => { @@ -680,23 +685,32 @@ impl Bibiman { // Index of selected popup field let popup_idx = self.popup_area.popup_state.selected().unwrap(); - // regex pattern to match citekey in fetched bibtexstring - let pattern = Regex::new(r"\{([^\{\},]*),").unwrap(); + let new_bib_entry = Bibliography::parse(&self.popup_area.popup_sel_item) + .map_err(|e| eyre!("Couldn't parse downloaded bib entry: {}", e.to_string()))?; - let citekey = pattern - .captures(&self.popup_area.popup_sel_item) - .unwrap() - .get(1) - .unwrap() - .as_str() - .to_string(); + let formatted_struct = + if let Some(formatter) = CitekeyFormatting::new(cfg, new_bib_entry.clone()) { + Some(formatter.do_formatting()) + } else { + None + }; + + let (new_citekey, entry_string) = if let Some(mut formatter) = formatted_struct { + ( + formatter.get_citekey_pair(0).unwrap().1, + formatter.print_updated_bib_as_string(), + ) + } else { + let keys = new_bib_entry.keys().collect::>(); + (keys[0].to_string(), new_bib_entry.to_biblatex_string()) + }; // Check if new file or existing file was choosen let mut file = if self.popup_area.popup_list[popup_idx] .0 .contains("Create new file") { - let citekey = PathBuf::from(&citekey); + let citekey = PathBuf::from(&new_citekey); // Get path of current files let path: PathBuf = if self.main_bibfiles[0].is_file() { self.main_bibfiles[0].parent().unwrap().to_owned() @@ -714,45 +728,18 @@ impl Bibiman { } else { let file_path = &self.main_bibfiles[popup_idx - 1]; - // Check if similar citekey already exists - let file_string = read_to_string(&file_path).unwrap(); - - // If choosen file contains entry with fetched citekey, append an - // char to the citekey so no dublettes are created - if file_string.contains(&citekey) { - let mut new_citekey = String::new(); - - // Loop over ASCII alpabetic chars and check again if citekey with - // appended char exists. If yes, move to next char and test again. - // If the citekey is free, use it and break the loop - for c in b'a'..=b'z' { - let append_char = (c as char).to_string(); - new_citekey = citekey.clone() + &append_char; - if !file_string.contains(&new_citekey) { - break; - } - } - - let new_entry_string_clone = self.popup_area.popup_sel_item.clone(); - - // Replace the double citekey with newly created - self.popup_area.popup_sel_item = pattern - .replace(&new_entry_string_clone, format!("{{{},", &new_citekey)) - .to_string(); - } - OpenOptions::new().append(true).open(file_path).unwrap() }; // Optionally, add a newline before the content file.write_all(b"\n")?; // Write content to file - file.write_all(self.popup_area.popup_sel_item.as_bytes())?; + file.write_all(entry_string.as_bytes())?; // Update the database and the lists to reflect the new content self.update_lists(cfg); self.close_popup(); // Select newly created entry - self.select_entry_by_citekey(&citekey); + self.select_entry_by_citekey(&new_citekey); Ok(()) } @@ -1285,38 +1272,10 @@ impl Bibiman { #[cfg(test)] mod tests { - use regex::Captures; - - use super::*; - #[test] fn citekey_pattern() { let citekey = format!("{{{},", "a_key_2001"); assert_eq!(citekey, "{a_key_2001,") } - - #[test] - fn regex_capture_citekey() { - let re = Regex::new(r"\{([^\{\},]*),").unwrap(); - - let bibstring = String::from("@article{citekey77_2001:!?, author = {Hanks, Tom}, title = {A great book}, year = {2001}}"); - - let citekey = re.captures(&bibstring).unwrap().get(1).unwrap().as_str(); - - assert_eq!(citekey, "citekey77_2001:!?"); - - if bibstring.contains(&citekey) { - let append_char = "a"; - let new_entry_string_clone = bibstring.clone(); - - let updated_bibstring = re - .replace(&new_entry_string_clone, |caps: &Captures| { - format!("{{{}{},", &caps[1], &append_char) - }) - .to_string(); - - assert_eq!(updated_bibstring, "@article{citekey77_2001:!?a, author = {Hanks, Tom}, title = {A great book}, year = {2001}}") - } - } } diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 999c6cb..4516b28 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -51,24 +51,27 @@ pub enum CitekeyCase { } #[derive(Debug, Default, Clone)] -pub(crate) struct CitekeyFormatting { +pub(crate) struct CitekeyFormatting<'a> { /// bibfile to replace keys at. The optional fields defines a differing /// output file to write to, otherwise original file will be overwritten. - bibfile_path: (PathBuf, Option), bib_entries: Bibliography, fields: Vec, case: Option, old_new_keys_map: Vec<(String, String)>, dry_run: bool, ascii_only: bool, + ignored_chars: &'a [char], + ignored_words: &'a [String], } -impl CitekeyFormatting { +impl<'a> CitekeyFormatting<'a> { pub(crate) fn parse_citekey_cli( parser: &mut lexopt::Parser, cfg: &BibiConfig, ) -> color_eyre::Result<()> { let mut formatter = CitekeyFormatting::default(); + let mut source_file = PathBuf::new(); + let mut target_file: Option = None; formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!( "Need to define {} correctly in config file", @@ -93,78 +96,73 @@ impl CitekeyFormatting { } Short('d') | Long("dry-run") => formatter.dry_run = true, Short('s') | Short('f') | Long("source") | Long("file") => { - formatter.bibfile_path.0 = parser.value()?.into() + source_file = parser.value()?.into() } Short('t') | Short('o') | Long("target") | Long("output") => { - formatter.bibfile_path.1 = Some(parser.value()?.into()) + target_file = Some(parser.value()?.into()) } _ => return Err(arg.unexpected().into()), } } - let bibstring = std::fs::read_to_string(&formatter.bibfile_path.0)?; + let bibstring = std::fs::read_to_string(&source_file)?; formatter.bib_entries = Bibliography::parse(&bibstring) .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?; - let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars { + formatter.ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars { chars.as_slice() } else { IGNORED_SPECIAL_CHARS.as_slice() }; - let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words { + formatter.ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words { words.as_slice() } else { &*IGNORED_WORDS.as_slice() }; formatter - .do_formatting(ignored_chars, ignored_words) + .do_formatting() .rev_sort_new_keys_by_len() - .update_file()?; + .update_file(source_file, target_file)?; Ok(()) } /// Start Citekey formatting with building a new instance of `CitekeyFormatting` - /// Formatting is processed file by file, because `bibman` can handle - /// multi-file setups. - /// The `Bibliography` inserted will be edited in place with the new citekeys. - /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography` - pub fn new>( - cfg: &BibiConfig, - path: P, - target: Option

, - bib_entries: Bibliography, - ) -> color_eyre::Result { - let fields = cfg - .citekey_formatter - .fields - .clone() - .expect("Need to define fields in config to format citekeys"); + pub fn new(cfg: &'a BibiConfig, bib_entries: Bibliography) -> Option { + let fields = cfg.citekey_formatter.fields.clone().unwrap_or(Vec::new()); if fields.is_empty() { - return Err(eyre!( - "To format all citekeys, you need to provide {} values in the config file", - "fields".bold() - )); + return None; } - Ok(Self { - bibfile_path: ( - path.as_ref().to_path_buf(), - target.map(|p| p.as_ref().to_path_buf()), - ), + let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars { + chars.as_slice() + } else { + IGNORED_SPECIAL_CHARS.as_slice() + }; + + let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words { + words.as_slice() + } else { + &*IGNORED_WORDS.as_slice() + }; + + Some(Self { bib_entries, fields, case: cfg.citekey_formatter.case.clone(), old_new_keys_map: Vec::new(), dry_run: false, ascii_only: cfg.citekey_formatter.ascii_only, + ignored_chars, + ignored_words, }) } - /// Process the actual formatting. The citekey of every entry will be updated. - pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self { + /// Process the actual formatting. Updated citekeys will be stored in a the + /// `self.old_new_keys_map` vector consisting of pairs (old key, new key). + pub fn do_formatting(mut self) -> Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter() { // Skip specific entries @@ -178,8 +176,8 @@ impl CitekeyFormatting { &self.fields, self.case.as_ref(), self.ascii_only, - ignored_chars, - ignored_words, + self.ignored_chars, + self.ignored_words, ), )); } @@ -189,8 +187,12 @@ impl CitekeyFormatting { self } - /// Write entries with updated citekeys to bibfile - pub fn update_file(&mut self) -> color_eyre::Result<()> { + /// Write formatted citekeys to bibfile replacing the old keys in all fields + pub fn update_file>( + &mut self, + source_file: P, + target_file: Option

, + ) -> color_eyre::Result<()> { if self.dry_run { println!("Following citekeys would be formatted: old => new\n"); self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0)); @@ -198,11 +200,10 @@ impl CitekeyFormatting { println!("{} => {}", old.italic(), new.bold()) } } else { - let source_file = self.bibfile_path.0.as_path(); - let target_file = if let Some(path) = &self.bibfile_path.1 { - path + let target_file = if let Some(path) = target_file { + path.as_ref().to_path_buf() } else { - source_file + source_file.as_ref().to_path_buf() }; let mut content = std::fs::read_to_string(source_file)?; @@ -228,23 +229,34 @@ impl CitekeyFormatting { /// You are **very encouraged** to call this method before `update_file()` to /// prevent replacing citekeys partly which afterwards wont match the pattern /// anymore. - pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self { + pub fn rev_sort_new_keys_by_len(mut self) -> Self { self.old_new_keys_map .sort_by(|a, b| b.0.len().cmp(&a.0.len())); self } + + /// Update the `Bibliography` of the `CitekeyFormatting` struct and return + /// it as `String`. + pub fn print_updated_bib_as_string(&mut self) -> String { + let mut content = self.bib_entries.to_biblatex_string(); + for (old_key, new_key) in self.old_new_keys_map.iter() { + content = content.replace(old_key, new_key); + } + content + } + + pub fn get_citekey_pair(&self, idx: usize) -> Option<(String, String)> { + self.old_new_keys_map.get(idx).map(|pair| pair.to_owned()) + } } #[cfg(test)] mod tests { - use std::path::PathBuf; - - use biblatex::Bibliography; - use crate::{ bibiman::citekeys::{CitekeyCase, CitekeyFormatting}, config::{IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; + use biblatex::Bibliography; #[test] fn format_citekey_test() { @@ -270,8 +282,7 @@ mod tests { } "; let bibliography = Bibliography::parse(src).unwrap(); - let mut formatting_struct = CitekeyFormatting { - bibfile_path: (PathBuf::new(), None), + let formatting_struct = CitekeyFormatting { bib_entries: bibliography, fields: vec![ "entrytype;;;;:".into(), @@ -284,9 +295,10 @@ mod tests { old_new_keys_map: Vec::new(), dry_run: false, ascii_only: true, + ignored_chars: &IGNORED_SPECIAL_CHARS, + ignored_words: &IGNORED_WORDS, }; - let _ = formatting_struct - .do_formatting(IGNORED_SPECIAL_CHARS.as_slice(), &*IGNORED_WORDS.as_slice()); + let formatting_struct = formatting_struct.do_formatting(); assert_eq!( formatting_struct.old_new_keys_map.get(0).unwrap().1, "article:bos-mccurley_lat=met=pub=wor_2023" -- cgit v1.2.3 From 3cd41cb1bc2046f1710175999305ab08508bae69 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 16:28:20 +0200 Subject: option to reformat citekey-based basenames of attachment files --- CITEKEYS.md | 8 +- src/bibiman/bibisetup.rs | 98 +++---- src/bibiman/citekeys.rs | 85 +++++- src/bibiman/citekeys/citekey_utils.rs | 17 +- tests/biblatex-test-citekeys.bib | 476 ---------------------------------- 5 files changed, 152 insertions(+), 532 deletions(-) delete mode 100644 tests/biblatex-test-citekeys.bib diff --git a/CITEKEYS.md b/CITEKEYS.md index 19bd497..828e557 100644 --- a/CITEKEYS.md +++ b/CITEKEYS.md @@ -147,11 +147,17 @@ should be built. - Most importantly: *always use the **`--dry-run`** option first*! This will print a list of old and new values for all citekeys in the file without changing anything. For the test file of this repo and using the pattern from - the [section below](#examples) `----dry-run` produces the following output: + the [section below](#examples) `--dry-run` produces the following output: [![niri-screenshot-2025-10-14-10-11-06.png](https://i.postimg.cc/SxxRkY8K/niri-screenshot-2025-10-14-10-11-06.png)](https://postimg.cc/bs4pRJmX) - After finding a good overall pattern, *use the `--output=` option* to create a new file and don't overwrite your existing file. Thus, your original file isn't broken if the key formatter produces some unwanted output. +- Its possible to update citekey based PDF and note files directly when + formatting the citekeys using the `-u`/`--update-attachments` option. Thus, + all PDFs and notes are already linked to the correct entries after updating + the citekeys. Since this operation can break things, use it with `--dry-run` + first. As with regular citekeys this will print all changes without processing + anything. - Even very long patterns are possible, they are not encouraged, since it bloats the bibfiles. - The same accounts for *too short* patterns; if the pattern is to unspecific, diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs index a83a507..a817236 100644 --- a/src/bibiman/bibisetup.rs +++ b/src/bibiman/bibisetup.rs @@ -22,6 +22,7 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::ffi::{OsStr, OsString}; +use std::path::Path; use std::{fs, path::PathBuf}; use walkdir::WalkDir; @@ -318,23 +319,27 @@ impl BibiSetup { cfg: &BibiConfig, ) -> Vec { let mut pdf_files = if cfg.general.pdf_path.is_some() { - collect_file_paths(cfg.general.pdf_path.as_ref().unwrap(), &Some(vec!["pdf"])) + collect_file_paths( + cfg.general.pdf_path.as_ref().unwrap(), + Some(vec!["pdf".into()].as_slice()), + ) + } else { + None + }; + let ext = if let Some(ext) = &cfg.general.note_extensions + && cfg.general.note_path.is_some() + { + // let mut ext: Vec<&str> = Vec::new(); + // for e in cfg.general.note_extensions.as_ref().unwrap().iter() { + // ext.push(e); + // } + Some(ext.as_slice()) } else { None }; - let ext: Option> = - if cfg.general.note_path.is_some() && cfg.general.note_extensions.is_some() { - let mut ext: Vec<&str> = Vec::new(); - for e in cfg.general.note_extensions.as_ref().unwrap().iter() { - ext.push(e); - } - Some(ext) - } else { - None - }; let mut note_files = if cfg.general.note_path.is_some() && cfg.general.note_extensions.is_some() { - collect_file_paths(cfg.general.note_path.as_ref().unwrap(), &ext) + collect_file_paths(cfg.general.note_path.as_ref().unwrap(), ext.clone()) } else { None }; @@ -369,7 +374,7 @@ impl BibiSetup { file_field: filepaths.1, subtitle: Self::get_subtitle(k, bibliography), notes: if note_files.is_some() { - Self::get_notepath(k, &mut note_files, &ext) + Self::get_notepath(k, &mut note_files, ext) } else { None }, @@ -575,18 +580,18 @@ impl BibiSetup { ) -> (Option>, bool) { if biblio.get(citekey).unwrap().file().is_ok() { ( - Some(vec![biblio - .get(citekey) - .unwrap() - .file() - .unwrap() - .trim() - .into()]), + Some(vec![ + biblio.get(citekey).unwrap().file().unwrap().trim().into(), + ]), true, ) } else if pdf_files.is_some() { ( - Self::merge_filepath_or_none_two(&citekey, pdf_files, vec!["pdf"]), + Self::merge_filepath_or_none_two( + &citekey, + pdf_files, + vec!["pdf".into()].as_slice(), + ), false, ) } else { @@ -597,10 +602,10 @@ impl BibiSetup { pub fn get_notepath( citekey: &str, note_files: &mut Option>>, - ext: &Option>, + ext: Option<&[String]>, ) -> Option> { if let Some(e) = ext { - Self::merge_filepath_or_none_two(citekey, note_files, e.to_vec()) + Self::merge_filepath_or_none_two(citekey, note_files, e) } else { None } @@ -627,7 +632,7 @@ impl BibiSetup { fn merge_filepath_or_none_two( citekey: &str, files: &mut Option>>, - extensions: Vec<&str>, + extensions: &[String], ) -> Option> { let mut file = Vec::new(); @@ -645,11 +650,7 @@ impl BibiSetup { } } - if file.is_empty() { - None - } else { - Some(file) - } + if file.is_empty() { None } else { Some(file) } } } @@ -663,15 +664,17 @@ impl BibiSetup { /// /// Passing [`None`] as argument for extensions will result in collecting all files /// from the given directory and its subdirectories! -pub fn collect_file_paths( - file_dir: &PathBuf, - extensions: &Option>, +pub fn collect_file_paths>( + file_dir: P, + extensions: Option<&[String]>, ) -> Option>> { let mut files: HashMap> = HashMap::new(); + let file_dir = file_dir.as_ref(); + // Expand tilde to /home/user let file_dir = if file_dir.starts_with("~") { - &app::expand_home(&file_dir) + &app::expand_home(&file_dir.to_path_buf()) } else { file_dir }; @@ -682,13 +685,13 @@ pub fn collect_file_paths( let f = file.unwrap().into_path(); if f.is_file() && f.extension().is_some() - && extensions.as_ref().is_some_and(|v| { + && extensions.is_some_and(|v| { v.contains( &f.extension() .unwrap_or_default() .to_ascii_lowercase() - .to_str() - .unwrap_or_default(), + .to_string_lossy() + .to_string(), ) }) { @@ -721,11 +724,7 @@ pub fn collect_file_paths( } } - if files.is_empty() { - None - } else { - Some(files) - } + if files.is_empty() { None } else { Some(files) } } #[cfg(test)] @@ -759,8 +758,11 @@ mod tests { ], ); - let matches = - BibiSetup::merge_filepath_or_none_two("citekey", &mut Some(files), vec!["md", "pdf"]); + let matches = BibiSetup::merge_filepath_or_none_two( + "citekey", + &mut Some(files), + vec!["md".into(), "pdf".into()].as_slice(), + ); assert_eq!( matches.clone().unwrap().iter().next().unwrap().to_owned(), @@ -770,9 +772,11 @@ mod tests { matches.clone().unwrap().last().unwrap().to_owned(), OsString::from("/one/other/citekey.pdf") ); - assert!(!matches - .clone() - .unwrap() - .contains(&OsString::from("/one/other/citekey2.pdf"))); + assert!( + !matches + .clone() + .unwrap() + .contains(&OsString::from("/one/other/citekey2.pdf")) + ); } } diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 4516b28..8f70ab0 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -16,6 +16,7 @@ ///// use std::{ + ffi::OsStr, fs::OpenOptions, io::Write, path::{Path, PathBuf}, @@ -28,7 +29,10 @@ use owo_colors::OwoColorize; use serde::{Deserialize, Serialize}; use crate::{ - bibiman::citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help}, + bibiman::{ + bibisetup::collect_file_paths, + citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help}, + }, config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; @@ -72,6 +76,7 @@ impl<'a> CitekeyFormatting<'a> { let mut formatter = CitekeyFormatting::default(); let mut source_file = PathBuf::new(); let mut target_file: Option = None; + let mut update_files = false; formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!( "Need to define {} correctly in config file", @@ -101,6 +106,7 @@ impl<'a> CitekeyFormatting<'a> { Short('t') | Short('o') | Long("target") | Long("output") => { target_file = Some(parser.value()?.into()) } + Short('u') | Long("update-attachments") => update_files = true, _ => return Err(arg.unexpected().into()), } } @@ -122,10 +128,13 @@ impl<'a> CitekeyFormatting<'a> { &*IGNORED_WORDS.as_slice() }; - formatter - .do_formatting() - .rev_sort_new_keys_by_len() - .update_file(source_file, target_file)?; + let mut updated_formatter = formatter.do_formatting().rev_sort_new_keys_by_len(); + + updated_formatter.update_file(source_file, target_file)?; + + if update_files { + updated_formatter.update_notes_pdfs(cfg)?; + } Ok(()) } @@ -194,7 +203,13 @@ impl<'a> CitekeyFormatting<'a> { target_file: Option

, ) -> color_eyre::Result<()> { if self.dry_run { - println!("Following citekeys would be formatted: old => new\n"); + println!( + "{}\n", + "Following citekeys would be formatted: old => new" + .bold() + .underline() + .white() + ); self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0)); for (old, new) in &self.old_new_keys_map { println!("{} => {}", old.italic(), new.bold()) @@ -235,6 +250,64 @@ impl<'a> CitekeyFormatting<'a> { self } + pub fn update_notes_pdfs(&self, cfg: &BibiConfig) -> color_eyre::Result<()> { + if let Some(pdf_path) = &cfg.general.pdf_path { + self.update_files_by_citekey_basename(pdf_path, vec!["pdf".into()].as_slice())?; + } + if let Some(note_path) = &cfg.general.note_path + && let Some(ext) = &cfg.general.note_extensions + { + self.update_files_by_citekey_basename(note_path, ext.as_slice())?; + } + Ok(()) + } + + fn update_files_by_citekey_basename>( + &self, + path: P, + ext: &[String], + ) -> color_eyre::Result<()> { + let files = collect_file_paths(path.as_ref(), Some(ext)); + if self.dry_run { + println!( + "\n{}\n", + "Following paths would be updated:" + .underline() + .bold() + .white() + ) + } + if let Some(mut f) = files { + for (old_key, new_key) in self.old_new_keys_map.iter() { + for e in ext { + let old_basename = old_key.to_owned() + "." + e; + if let Some(item) = f.get_mut(&old_basename) { + for p in item { + let ext = p.extension(); + let basename = new_key.to_owned() + + "." + + ext.unwrap_or(OsStr::new("")).to_str().unwrap_or(""); + let new_name = p + .parent() + .expect("parent expected") + .join(Path::new(&basename)); + if !self.dry_run { + std::fs::rename(p, new_name)?; + } else { + println!( + "{} => {}", + p.display().to_string().italic().dimmed(), + new_name.display().to_string().bold() + ) + } + } + } + } + } + } + Ok(()) + } + /// Update the `Bibliography` of the `CitekeyFormatting` struct and return /// it as `String`. pub fn print_updated_bib_as_string(&mut self) -> String { diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index 58a8274..61a1804 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -79,7 +79,7 @@ pub(super) fn formatting_help() { \t{} \tThe bibfile for which the citekey formatting should be processed. \tTakes a path as argument. - ", "-s, -f, --source=, --file=".fg::().bold()}, + ", "-s, -f, --source=, --file=".fg::().bold()}, formatdoc!( " \t{} @@ -88,9 +88,22 @@ pub(super) fn formatting_help() { \tcreated. \tIf the argument isn't used, the original file will be {}! ", - "-t, -o, --target=, --output=".fg::().bold(), + "-t, -o, --target=, --output=" + .fg::() + .bold(), "overwritten".italic(), ), + formatdoc!( + " + \t{} + \tWhen formatting citekeys also rename all PDFs and notefiles + \tfollowing the bibiman citekey-basename scheme at the locations + \tset in the config file. This option can break file paths. Try + \twith {} first! + ", + "-u, --update-attachments".fg::().bold(), + "--dry-run".bold() + ), ]; let help = help.join("\n"); println!("{}", help); diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib deleted file mode 100644 index 34c2f33..0000000 --- a/tests/biblatex-test-citekeys.bib +++ /dev/null @@ -1,476 +0,0 @@ -@set{SET, - entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006}, - annotation = {A \texttt{set} with three members.}, -} - -@set{stdmodel, - entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam}, - annotation = {A \texttt{set} with three members discussing the standard - model of particle physics.}, -} - -@collection{collection:matuz-miller_contem=litera=critic_1990gale, - title = {Contemporary Literary Criticism}, - year = {1990}, - location = {Detroit}, - publisher = {Gale}, - volume = {61}, - pages = {204--208}, - editor = {Matuz, Roger and Miller, Helen}, - keywords = {narration}, - langid = {english}, - langidopts = {variant=american}, - annotation = {A \texttt{collection} entry providing the excerpt information - for the \texttt{article:doody_heming=style=jakes_1974} entry. Note the format of the \texttt{ - pages} field}, -} - -@article{article:aksin-turkmen_effect=immobi=on_2006, - title = {Effect of immobilization on catalytic characteristics of saturated - {Pd-N}-heterocyclic carbenes in {Mizoroki-Heck} reactions}, - author = {Aks{\i}n, {\"O}zge and T{\"u}rkmen, Hayati and Artok , Levent and - { \c{C}}etinkaya, Bekir and Ni, Chaoying and B{\" u}y{ \"u}kg{\"u} - ng{ \" o}r, Orhan and {\"O}zkal, Erhan}, - volume = {691}, - number = {13}, - pages = {3027--3036}, - journaltitle = jomch, - date = {2006}, - indextitle = {Effect of immobilization on catalytic characteristics}, -} - -@article{article:angenendt_honore=salvat=vom_2002, - title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der Patrozinienkunde}, - shorttitle = {In Honore Salvatoris}, - author = {Angenendt, Arnold}, - volume = {97}, - pages = {431--456, 791--823}, - journaltitle = {Revue d'Histoire Eccl{\'e}siastique}, - date = {2002}, - langid = {german}, - indextitle = {In Honore Salvatoris}, - annotation = {A German article in a French journal. Apart from that, a - typical \texttt{article} entry. Note the \texttt{indextitle} - field}, -} - -@book{book:aristotle_de=anima_1907cambr#unive#press, - title = {De Anima}, - author = {Aristotle}, - location = {Cambridge}, - publisher = cup, - date = {1907}, - editor = {Hicks, Robert Drew}, - keywords = {primary, ancient, philosophy, athens}, - langid = {english}, - langidopts = {variant=british}, - annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ - editor}}, -} - -@book{book:aristotle_physic_1929g#p#putna, - title = {Physics}, - shorttitle = {Physics}, - author = {Aristotle}, - location = {New York}, - publisher = {G. P. Putnam}, - url = {https://www.infobooks.org/authors/classic/aristotle-books/#Physic}, - date = {1929}, - translator = {Wicksteed, P. H. and Cornford, F. M.}, - keywords = {primary, ancient, philosophy}, - langid = {english}, - langidopts = {variant=american}, - file = {~/Documents/coding/projects/bibiman/tests/book:aristotle_physic_1929g#p#putna.pdf}, - annotation = {A \texttt{book} entry with a \texttt{translator} field}, - abstract = {The Physics is a work by Aristotle dedicated to the study of - nature. Regarded by Heidegger as "the fundamental work of Western - philosophy", it presents the renowned distinction between the - four types of cause, as well as reflections on chance, motion, - infinity, and other fundamental concepts. It is here that - Aristotle sets out his celebrated paradox of time.}, -} - -@book{book:aristotle_poetic_1968clare#press, - title = {Poetics}, - shorttitle = {Poetics}, - author = {Aristotle}, - location = {Oxford}, - publisher = {Clarendon Press}, - series = {Clarendon {Aristotle}}, - date = {1968}, - editor = {Lucas, D. W.}, - keywords = {primary}, - langid = {english}, - langidopts = {variant=british}, - annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ - editor} as well as a \texttt{series} field}, -} - -@mvbook{mvbook:aristotle_rhetor=aristo=with_1877cambr#unive#press, - title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward - Meredith Cope}}, - shorttitle = {Rhetoric}, - author = {Aristotle}, - publisher = cup, - date = {1877}, - editor = {Cope, Edward Meredith}, - commentator = {Cope, Edward Meredith}, - volumes = {3}, - keywords = {primary}, - langid = {english}, - langidopts = {variant=british}, - sorttitle = {Rhetoric of Aristotle}, - indextitle = {Rhetoric of {Aristotle}, The}, - annotation = {A commented edition. Note the concatenation of the \texttt{ - editor} and \texttt{commentator} fields as well as the \texttt{ - volumes}, \texttt{sorttitle}, and \texttt{indextitle} fields}, -} - -@book{book:augustine_hetero=cataly=synthe_1995marce#dekke, - title = {Heterogeneous catalysis for the synthetic \textit{chemist}}, - shorttitle = {Heterogeneous catalysis}, - author = {Augustine, Robert L.}, - location = {New York}, - publisher = {Marcel Dekker}, - date = {1995}, - langid = {english}, - langidopts = {variant=american}, - annotation = {A plain \texttt{book} entry}, - keywords = {chemistry}, -} - -@book{book:averroes_epistl=on=possib_1982jewis#theol#semin#ameri, - title = {The Epistle on the Possibility of Conjunction with the Active - Intellect by {Ibn Rushd} with the Commentary of {Moses Narboni}}, - shorttitle = {Possibility of Conjunction}, - author = {Averroes}, - location = {New York}, - publisher = {Jewish Theological Seminary of America}, - series = {Moreshet: Studies in {Jewish} History, Literature and Thought}, - number = {7}, - date = {1982}, - editor = {Bland, Kalman P.}, - translator = {Bland, Kalman P.}, - keywords = {primary}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {Epistle on the Possibility of Conjunction, The}, - annotation = {A \texttt{book} entry with a \texttt{series} and a \texttt{ - number}. Note the concatenation of the \texttt{editor} and - \texttt{translator} fields as well as the \texttt{indextitle} - field}, -} - -@article{article:baez-lauda_higher=algebr=v_2004, - title = {Higher-Dimensional Algebra {V}: 2-Groups}, - author = {Baez, John C. and Lauda, Aaron D.}, - volume = {12}, - pages = {423--491}, - journaltitle = {Theory and Applications of Categories}, - date = {2004}, - version = {3}, - eprint = {math/0307200v3}, - eprinttype = {arxiv}, - langid = {english}, - keywords = {math}, - langidopts = {variant=american}, - annotation = {An \texttt{article} with \texttt{eprint} and \texttt{ - eprinttype} fields. Note that the arXiv reference is - transformed into a clickable link if \texttt{hyperref} support - has been enabled. Compare \texttt{baez\slash online}, which is - the same item given as an \texttt{online} entry}, -} - -@article{article:bertram-wentworth_gromov=invari=holomo_1996, - title = {Gromov invariants for holomorphic maps on {Riemann} surfaces}, - shorttitle = {Gromov invariants}, - author = {Bertram, Aaron and Wentworth, Richard}, - volume = {9}, - number = {2}, - pages = {529--571}, - journaltitle = jams, - date = {1996}, - langid = {english}, - langidopts = {variant=american}, - annotation = {An \texttt{article} entry with a \texttt{volume} and a \texttt - {number} field}, -} - -@article{article:doody_heming=style=jakes_1974, - title = {Hemingway's Style and {Jake's} Narration}, - author = {Doody, Terrence}, - year = {1974}, - journal = {The Journal of Narrative Technique}, - volume = {4}, - number = {3}, - pages = {212--225}, - langid = {english}, - langidopts = {variant=american}, - related = {matuz:article:doody_heming=style=jakes_1974}, - relatedstring = {\autocap{e}xcerpt in}, - annotation = {An \texttt{article} entry cited as an excerpt from a \texttt{ - collection} entry. Note the format of the \texttt{related} and - \texttt{relatedstring} fields}, -} - -@article{article:gillies_herder=prepar=goethe_1933, - title = {Herder and the Preparation of {Goethe's} Idea of World Literature}, - author = {Gillies, Alexander}, - series = {newseries}, - volume = {9}, - pages = {46--67}, - journaltitle = {Publications of the English Goethe Society}, - date = {1933}, - langid = {english}, - langidopts = {variant=british}, - annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt - {volume} field. Note that format of the \texttt{series} field - in the database file}, -} - -@article{article:glashow_partia=symmet=weak_1961, - title = {Partial Symmetries of Weak Interactions}, - author = {Glashow, Sheldon}, - volume = {22}, - pages = {579--588}, - journaltitle = {Nucl.~Phys.}, - date = {1961}, -} - -@article{article:herrmann-ofele_carboc=carben=as_2006, - title = {A carbocyclic carbene as an efficient catalyst ligand for {C--C} - coupling reactions}, - author = {Herrmann, Wolfgang A. and {\"O}fele, Karl and Schneider, Sabine K. - and Herdtweck, Eberhardt and Hoffmann, Stephan D.}, - volume = {45}, - number = {23}, - pages = {3859--3862}, - journaltitle = anch-ie, - date = {2006}, - indextitle = {Carbocyclic carbene as an efficient catalyst, A}, -} - -@article{article:hostetler-wingate_alkane=gold=cluste_1998, - title = {Alkanethiolate gold cluster molecules with core diameters from 1.5 - to 5.2~{nm}}, - shorttitle = {Alkanethiolate gold cluster molecules}, - author = {Hostetler, Michael J. and Wingate, Julia E. and Zhong, Chuan-Jian - and Harris, Jay E. and Vachet, Richard W. and Clark, Michael R. and - Londono, J. David and Green, Stephen J. and Stokes, Jennifer J. and - Wignall, George D. and Glish, Gary L. and Porter, Marc D. and Evans - , Neal D. and Murray, Royce W.}, - volume = {14}, - number = {1}, - pages = {17--30}, - journaltitle = {Langmuir}, - date = {1998}, - subtitle = {Core and monolayer properties as a function of core size}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {Alkanethiolate gold cluster molecules}, - annotation = {An \texttt{article} entry with \arabic{author} authors. By - default, long author and editor lists are automatically - truncated. This is configurable}, -} - -@article{article:kastenholz-hunenberger_comput=method=ionic_2006, - title = {Computation of methodology\hyphen independent ionic solvation free - energies from molecular simulations}, - author = {Kastenholz, M. A. and H{\"u}nenberger, Philippe H.}, - volume = {124}, - doi = {10.1063/1.2172593}, - journaltitle = jchph, - date = {2006}, - subtitle = {{I}. {The} electrostatic potential in molecular liquids}, - eid = {124106}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {Computation of ionic solvation free energies}, - annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{ - doi} field. Note that the \textsc{doi} is transformed into a - clickable link if \texttt{hyperref} support has been enabled}, - abstract = {The computation of \texttt{ionic} solvation free energies from atomistic - simulations is a surprisingly difficult problem that has found no - satisfactory solution for more than 15 years. The reason is that - the charging free energies evaluated from such simulations are - affected by very large errors. One of these is related to the - choice of a specific convention for summing up the contributions - of solvent charges to the electrostatic potential in the ionic - cavity, namely, on the basis of point charges within entire - solvent molecules (M scheme) or on the basis of individual point - charges (P scheme). The use of an inappropriate convention may - lead to a charge-independent offset in the calculated potential, - which depends on the details of the summation scheme, on the - quadrupole-moment trace of the solvent molecule, and on the - approximate form used to represent electrostatic interactions in - the system. However, whether the M or P scheme (if any) - represents the appropriate convention is still a matter of - on-going debate. The goal of the present article is to settle - this long-standing controversy by carefully analyzing (both - analytically and numerically) the properties of the electrostatic - potential in molecular liquids (and inside cavities within them). - }, -} - -@article{article:sarfraz-razzak_techni=sectio=algori_2002, - title = {Technical section: {An} algorithm for automatic capturing of the - font outlines}, - author = {M. Sarfraz and M. F. A. Razzak}, - year = {2002}, - journal = {Computers and Graphics}, - volume = {26}, - number = {5}, - pages = {795--804}, - issn = {0097-8493}, - annotation = {An \texttt{article} entry with an \texttt{issn} field}, -} - -@article{article:reese_georgi=anglos=diplom_1958, - title = {Georgia in {Anglo-Spanish} Diplomacy, 1736--1739}, - author = {Reese, Trevor R.}, - series = {3}, - volume = {15}, - pages = {168--190}, - journaltitle = {William and Mary Quarterly}, - date = {1958}, - langid = {english}, - langidopts = {variant=american}, - annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt - {volume} field. Note the format of the series. If the value of - the \texttt{series} field is an integer, this number is printed - as an ordinal and the string \enquote*{series} is appended - automatically}, -} - -@article{article:shore_twiceb=once=concei_1991, - title = {Twice-Born, Once Conceived}, - author = {Shore, Bradd}, - series = {newseries}, - volume = {93}, - number = {1}, - pages = {9--27}, - journaltitle = {American Anthropologist}, - date = {1991-03}, - subtitle = {Meaning Construction and Cultural Cognition}, - annotation = {An \texttt{article} entry with \texttt{series}, \texttt{volume - }, and \texttt{number} fields. Note the format of the \texttt{ - series} which is a localization key}, -} - -@article{article:sigfridsson-ryde_compar=method=derivi_1998, - title = {Comparison of methods for deriving atomic charges from the - electrostatic potential and moments}, - author = {Sigfridsson, Emma and Ryde, Ulf}, - volume = {19}, - number = {4}, - pages = {377--395}, - doi = {10.1002/(SICI)1096-987X(199803)19:4<377::AID-JCC1>3.0.CO;2-P}, - journaltitle = {Journal of Computational Chemistry}, - date = {1998}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {Methods for deriving atomic charges}, - annotation = {An \texttt{article} entry with \texttt{volume}, \texttt{number - }, and \texttt{doi} fields. Note that the \textsc{doi} is - transformed into a clickable link if \texttt{hyperref} support - has been enabled}, - abstract = {Four methods for deriving partial atomic charges from the - quantum chemical electrostatic potential (CHELP, CHELPG, - Merz-Kollman, and RESP) have been compared and critically - evaluated. It is shown that charges strongly depend on how and - where the potential points are selected. Two alternative methods - are suggested to avoid the arbitrariness in the point-selection - schemes and van der Waals exclusion radii: CHELP-BOW, which also - estimates the charges from the electrostatic potential, but with - potential points that are Boltzmann-weighted after their - occurrence in actual simulations using the energy function of the - program in which the charges will be used, and CHELMO, which - estimates the charges directly from the electrostatic multipole - moments. Different criteria for the quality of the charges are - discussed.}, -} - -@article{article:spiegelberg_intent=intent=schola_1969, - title = {\mkbibquote{Intention} und \mkbibquote{Intentionalit{\"a}t} in der - Scholastik, bei Brentano und Husserl}, - shorttitle = {Intention und Intentionalit{\"a}t}, - author = {Spiegelberg, Herbert}, - volume = {29}, - pages = {189--216}, - journaltitle = {Studia Philosophica}, - date = {1969}, - langid = {german}, - sorttitle = {Intention und Intentionalitat in der Scholastik, bei Brentano - und Husserl}, - indexsorttitle = {Intention und Intentionalitat in der Scholastik, bei - Brentano und Husserl}, - annotation = {An \texttt{article} entry. Note the \texttt{sorttitle} and - \texttt{indexsorttitle} fields and the markup of the quotes in - the database file}, -} - -@article{article:springer_mediae=pilgri=routes_1950, - title = {Mediaeval Pilgrim Routes from {Scandinavia} to {Rome}}, - shorttitle = {Mediaeval Pilgrim Routes}, - author = {Springer, Otto}, - volume = {12}, - pages = {92--122}, - journaltitle = {Mediaeval Studies}, - date = {1950}, - langid = {english}, - langidopts = {variant=british}, - annotation = {A plain \texttt{article} entry}, -} - -@article{article:weinberg_model=lepton_1967, - title = {A Model of Leptons}, - author = {Weinberg, Steven}, - volume = {19}, - pages = {1264--1266}, - journaltitle = {Phys.~Rev.~Lett.}, - date = {1967}, -} - -@string{anch-ie = {Angew.~Chem. Int.~Ed.}} - -@string{cup = {Cambridge University Press}} - -@string{dtv = {Deutscher Taschenbuch-Verlag}} - -@string{hup = {Harvard University Press}} - -@string{jams = {J.~Amer. Math. Soc.}} - -@string{jchph = {J.~Chem. Phys.}} - -@string{jomch = {J.~Organomet. Chem.}} - -@string{pup = {Princeton University Press}} - -@incollection{incollection:westfahl_true=fronti, - title = {The True Frontier}, - author = {Westfahl, Gary}, - pages = {55--65}, - subtitle = {Confronting and Avoiding the Realities of Space in {American} - Science Fiction Films}, - crossref = {westfahl:frontier}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {True Frontier, The}, - annotation = {A cross-referenced article from a \texttt{collection}. This is - an \texttt{incollection} entry with a \texttt{crossref} field. - Note the \texttt{subtitle} and \texttt{indextitle} fields}, -} - -@article{article:yoon-ryu_pallad=pincer=comple_2006, - title = {Palladium pincer complexes with reduced bond angle strain: - efficient catalysts for the {Heck} reaction}, - author = {Yoon, Myeong S. and Ryu, Dowook and Kim, Jeongryul and Ahn, Kyo - Han}, - volume = {25}, - number = {10}, - pages = {2409--2411}, - journaltitle = {Organometallics}, - date = {2006}, - indextitle = {Palladium pincer complexes}, -} -- cgit v1.2.3 From b582588b642e8a38956df2b573ae2be51f19e082 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 16:50:31 +0200 Subject: correct some typos --- src/bibiman/citekeys/citekey_utils.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index 61a1804..b8f5600 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -38,11 +38,12 @@ pub(super) fn formatting_help() { ), formatdoc!("{}", "USAGE".bold()), formatdoc!( - "\t{} {} {} {}\n", + "\t{} {} {} {} {}\n", env!("CARGO_PKG_NAME").fg::().bold(), "format-citekeys".bold(), + "[OPTIONS]".bold(), "--source=".bold(), - "--output=".bold() + "[--output=]".bold() ), formatdoc!( " @@ -96,10 +97,10 @@ pub(super) fn formatting_help() { formatdoc!( " \t{} - \tWhen formatting citekeys also rename all PDFs and notefiles - \tfollowing the bibiman citekey-basename scheme at the locations - \tset in the config file. This option can break file paths. Try - \twith {} first! + \tWhen this option is set, bibiman will also rename all PDFs and + \tnotefiles following the bibiman citekey-basename scheme at the + \tlocations set in the config file. This option can break file paths. + \tTry with {} first! ", "-u, --update-attachments".fg::().bold(), "--dry-run".bold() -- cgit v1.2.3 From 2dc231247757a9a80b1925ed215f53f54eececa5 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 15 Oct 2025 07:28:20 +0200 Subject: fix tests, remove unneeded imports, add description --- src/bibiman/citekeys.rs | 6 +++--- src/bibiman/citekeys/citekey_utils.rs | 3 --- src/config.rs | 1 + 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 8f70ab0..fdeed14 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -241,9 +241,9 @@ impl<'a> CitekeyFormatting<'a> { /// That will prevent the replacement longer key parts that equal a full shorter /// key. /// - /// You are **very encouraged** to call this method before `update_file()` to - /// prevent replacing citekeys partly which afterwards wont match the pattern - /// anymore. + /// You are **very encouraged** to call this method before `update_file()` + /// or `update_notes_pdfs` to prevent replacing citekeys partly which + /// afterwards wont match the pattern anymore. pub fn rev_sort_new_keys_by_len(mut self) -> Self { self.old_new_keys_map .sort_by(|a, b| b.0.len().cmp(&a.0.len())); diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index b8f5600..773a2d2 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -15,10 +15,7 @@ // along with this program. If not, see . ///// -use std::sync::LazyLock; - use biblatex::{ChunksExt, Entry, Type}; -use deunicode::deunicode; use indoc::formatdoc; use owo_colors::{ OwoColorize, diff --git a/src/config.rs b/src/config.rs index b8d8b45..47e145c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -501,6 +501,7 @@ mod tests { year_color = "135" [citekey_formatter] + ascii_only = true "#, )?; -- cgit v1.2.3 From db882623358d9141927bd31f6825472f2cdca4b6 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 15 Oct 2025 08:49:43 +0200 Subject: fix some typos --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3fb81c8..086f64a 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,7 @@ bibiman tests/biblatex-test.bib tests/multi-files/ Beside the TUI `bibiman` can format and replace citekeys. To make use of this feature run the program with the `format-citekeys` subcommand. For more -information on this use `bibiman format-citekeys --help` and the +information use `bibiman format-citekeys --help` and the see [docs](./CITEKEYS.md). ## Configuration -- cgit v1.2.3