From 67afd67d4d51a00079269d431a7058fc50750886 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 7 Oct 2025 15:05:47 +0200 Subject: implement basic citekey formatting: * Reads patterns and parses them. TODO: * **Fully** sanitize Latex macros * Preprocess complex and regularly used fields like `author` * Write changes to original bib file --- src/bibiman.rs | 4 +- src/bibiman/bibisetup.rs | 10 ++- src/bibiman/citekeys.rs | 167 +++++++++++++++++++++++++++++++++++++++++++++++ src/config.rs | 8 +++ 4 files changed, 186 insertions(+), 3 deletions(-) create mode 100644 src/bibiman/citekeys.rs (limited to 'src') diff --git a/src/bibiman.rs b/src/bibiman.rs index c423ce1..3158d73 100644 --- a/src/bibiman.rs +++ b/src/bibiman.rs @@ -40,6 +40,7 @@ use std::result::Result::Ok; use tui_input::Input; pub mod bibisetup; +pub mod citekeys; pub mod entries; pub mod keywords; pub mod search; @@ -88,13 +89,14 @@ pub struct Bibiman { } impl Bibiman { - // Constructs a new instance of [`App`]. + /// Constructs a new instance of [`Bibiman`]. pub fn new(args: &mut CLIArgs, cfg: &mut BibiConfig) -> Result { let mut main_bibfiles: Vec = args.pos_args.clone(); if cfg.general.bibfiles.is_some() { main_bibfiles.append(cfg.general.bibfiles.as_mut().unwrap()) }; let main_bibfiles = cliargs::parse_files(main_bibfiles); + // TODO: insert workflow for formatting citekeys let main_biblio = BibiSetup::new(&main_bibfiles, cfg); let tag_list = TagList::new(main_biblio.keyword_list.clone()); let search_struct = BibiSearch::default(); diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs index b3f788c..a83a507 100644 --- a/src/bibiman/bibisetup.rs +++ b/src/bibiman/bibisetup.rs @@ -246,8 +246,14 @@ impl BibiData { } impl BibiSetup { + /// Setup the TUI: + /// * Getting files + /// * Parse files into `biblatex::Bibliography` struct + /// * If wanted, format citekeys + /// * Get citekey vector + /// * Collect all keywords + /// * Build the entry list to be displayed pub fn new(main_bibfiles: &[PathBuf], cfg: &BibiConfig) -> Self { - // TODO: Needs check for config file path as soon as config file is impl Self::check_files(main_bibfiles); let bibfilestring = Self::bibfiles_to_string(main_bibfiles); let bibliography = biblatex::Bibliography::parse(&bibfilestring).unwrap(); @@ -264,7 +270,7 @@ impl BibiSetup { } } - // Check which file format the passed file has + /// Check which file format the passed file has fn check_files(main_bibfiles: &[PathBuf]) { if main_bibfiles.is_empty() { println!( diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs new file mode 100644 index 0000000..4c36e80 --- /dev/null +++ b/src/bibiman/citekeys.rs @@ -0,0 +1,167 @@ +use biblatex::Bibliography; +use color_eyre::eyre::eyre; +use owo_colors::OwoColorize; + +use crate::config::BibiConfig; + +#[derive(Debug, Default, Clone)] +pub(crate) struct CitekeyFormatting { + bib_entries: Bibliography, + fields: Vec, +} + +impl CitekeyFormatting { + /// Start Citekey formatting with building a new instance of `CitekeyFormatting` + /// Formatting is processed file by file, because `bibman` can handle + /// multi-file setups. + /// The `Bibliography` inserted will be edited in place with the new citekeys. + /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography` + pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result { + let fields = cfg.citekey_formatter.fields.clone(); + if fields.is_empty() { + return Err(eyre!( + "To format all citekeys, you need to provide {} values in the config file", + "fields".bold() + )); + } + Ok(Self { + bib_entries, + fields, + }) + } + + pub fn do_formatting(&mut self) { + for entry in self.bib_entries.iter_mut() { + let mut new_citekey = String::new(); + for pattern in self.fields.iter() { + let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = + split_formatting_pat(pattern); + let formatted_field_str = { + let mut formatted_str = String::new(); + let field = entry.get_as::(field).expect(&format!( + "Couldn't find field {}", + field.bold().bright_red() + )); + let mut split_field = field.split_whitespace(); + let mut words_passed = 0; + loop { + if let Some(field_slice) = split_field.next() { + formatted_str = formatted_str + format_word(field_slice, char_count); + words_passed += 1; + if word_count.is_some_and(|count| count == words_passed) { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; + } else { + formatted_str = formatted_str + inner_delimiter.unwrap_or("") + } + } else { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; + }; + } + formatted_str + }; + new_citekey = new_citekey + &formatted_field_str; + } + entry.key = new_citekey; + } + } +} + +fn preformat_field() {} + +/// Cut of word at char count index if its set +fn format_word(word: &str, count: Option) -> &str { + if let Some(len) = count + && len < word.chars().count() + { + &word[..len] + } else { + word + } +} + +/// Split a formatting pattern of kind +/// `;;;;`, +/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")` +fn split_formatting_pat( + pattern: &str, +) -> ( + &str, + Option, + Option, + Option<&str>, + Option<&str>, +) { + let mut splits = pattern.split(';'); + ( + splits + .next() + .expect("Need field value for formatting citekey"), + if let Some(next) = splits.next() + && next.len() > 0 + { + next.parse::().ok() + } else { + None + }, + if let Some(next) = splits.next() + && next.len() > 0 + { + next.parse::().ok() + } else { + None + }, + splits.next(), + splits.next(), + ) +} + +#[cfg(test)] +mod tests { + use biblatex::Bibliography; + use itertools::Itertools; + + use crate::bibiman::citekeys::{CitekeyFormatting, split_formatting_pat}; + + #[test] + fn split_citekey_pattern() { + let pattern = "title;3;5;_;_"; + + assert_eq!( + split_formatting_pat(pattern), + ("title", Some(3), Some(5), Some("_"), Some("_")) + ); + + let pattern = "year"; + + assert_eq!( + split_formatting_pat(pattern), + ("year", None, None, None, None) + ); + + let pattern = "author;1;;;_"; + assert_eq!( + split_formatting_pat(pattern), + ("author", Some(1), None, Some(""), Some("_")) + ); + } + + #[test] + fn format_citekey_test() { + let src = r"@book{tolkien1937, author = {Tolkien}, title = {\enquote{Lord} of the \textbf{Rings}}, year = {1937}}"; + let bibliography = Bibliography::parse(src).unwrap(); + let mut formatting_struct = CitekeyFormatting { + bib_entries: bibliography, + fields: vec![ + "author;1;;-;_".into(), + "title;3;3;_;_".into(), + "year".into(), + ], + }; + formatting_struct.do_formatting(); + let keys = formatting_struct.bib_entries.keys().collect_vec(); + assert_eq!(keys[0], "Tolkien_Lor_of_the_1937"); + assert_eq!(keys[0].to_lowercase(), "tolkien_lor_of_the_1937"); + } +} diff --git a/src/config.rs b/src/config.rs index 00a35b7..78cfef9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -102,6 +102,7 @@ const DEFAULT_CONFIG: &str = r##" pub struct BibiConfig { pub general: General, pub colors: Colors, + pub citekey_formatter: CitekeyFormatter, } /// Substruct [general] in config.toml @@ -143,6 +144,11 @@ pub struct Colors { pub year_color: Color, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CitekeyFormatter { + pub fields: Vec, +} + impl Default for BibiConfig { fn default() -> Self { Self { @@ -161,6 +167,7 @@ impl Default for BibiConfig { custom_column: CustomField::Pubtype, }, colors: Self::dark_colors(), + citekey_formatter: CitekeyFormatter { fields: Vec::new() }, } } } @@ -187,6 +194,7 @@ impl BibiConfig { } else { Self::dark_colors() }, + citekey_formatter: CitekeyFormatter { fields: Vec::new() }, } } -- cgit v1.2.3 From a07359a9a1da0c06c040f77158be31b3883b33ac Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 8 Oct 2025 13:49:06 +0200 Subject: refine matching and preformatting of fields for citekey formattin; add case field and enum --- Cross.toml | 6 ++ src/bibiman/citekeys.rs | 136 ++++++++++++++++++++++++++--- src/bibiman/sanitize.rs | 10 ++- src/bibiman/sanitize/optimized_sanitize.rs | 28 +++++- src/config.rs | 24 +++-- 5 files changed, 177 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/Cross.toml b/Cross.toml index e7cd27b..6140bf2 100644 --- a/Cross.toml +++ b/Cross.toml @@ -9,3 +9,9 @@ pre-build = [ "dpkg --add-architecture $CROSS_DEB_ARCH", "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH", ] + +[target.x86_64-unknown-freebsd] +# pre-build = [ +# "dpkg --add-architecture $CROSS_DEB_ARCH", +# "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH", +# ] diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 4c36e80..a304e92 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -1,13 +1,40 @@ -use biblatex::Bibliography; +// bibiman - a TUI for managing BibLaTeX databases +// Copyright (C) 2025 lukeflo +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +///// + +use biblatex::{Bibliography, ChunksExt, Entry, Type}; use color_eyre::eyre::eyre; use owo_colors::OwoColorize; +use serde::{Deserialize, Serialize}; + +use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; -use crate::config::BibiConfig; +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub(crate) enum CitekeyCase { + #[serde(alias = "uppercase", alias = "upper")] + Upper, + #[serde(alias = "lowercase", alias = "lower")] + Lower, +} #[derive(Debug, Default, Clone)] pub(crate) struct CitekeyFormatting { bib_entries: Bibliography, fields: Vec, + case: Option, } impl CitekeyFormatting { @@ -27,6 +54,7 @@ impl CitekeyFormatting { Ok(Self { bib_entries, fields, + case: cfg.citekey_formatter.case.clone(), }) } @@ -38,17 +66,36 @@ impl CitekeyFormatting { split_formatting_pat(pattern); let formatted_field_str = { let mut formatted_str = String::new(); - let field = entry.get_as::(field).expect(&format!( - "Couldn't find field {}", - field.bold().bright_red() - )); + let field = preformat_field(field, entry); + // let field = if let Ok(val) = entry.get_as::(field) { + // val + // } else { + // eprintln!( + // "Unable to get field {} for entry {}", + // field.bright_red(), + // &entry.key.bold() + // ); + // continue; + // }; + // let field = entry.get_as::(field).expect(&format!( + // "Couldn't find field {}", + // field.bold().bright_red() + // )); let mut split_field = field.split_whitespace(); let mut words_passed = 0; + let word_count = if let Some(val) = word_count { + val + } else { + field.split_whitespace().count() + // split_field.size_hint().0 + 1 + }; + dbg!(word_count); loop { if let Some(field_slice) = split_field.next() { formatted_str = formatted_str + format_word(field_slice, char_count); words_passed += 1; - if word_count.is_some_and(|count| count == words_passed) { + // if word_count.is_some_and(|count| count == words_passed) { + if word_count == words_passed { formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); break; } else { @@ -68,7 +115,51 @@ impl CitekeyFormatting { } } -fn preformat_field() {} +/// Preformat some fields which are very common to be used in citekeys +fn preformat_field(field: &str, entry: &mut Entry) -> String { + match field { + "title" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + } + "author" => { + if let Ok(authors) = entry.author() { + let mut last_names = String::new(); + for a in authors.iter() { + last_names = last_names + &a.name + " "; + } + dbg!(&last_names); + last_names + } else { + "NA".to_string() + } + } + "year" => { + if let Ok(date) = entry.date() { + date.to_chunks().format_verbatim()[..4].to_string() + } else { + entry.get_as::(field).unwrap_or("NA".into()) + } + } + "subtitle" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + } + "editor" => { + if let Ok(editors) = entry.editors() { + let mut last_names = String::new(); + for editortypes in editors.iter() { + for e in editortypes.0.iter() { + last_names = last_names + &e.name + " "; + } + } + last_names + } else { + "NA".to_string() + } + } + "pubtype" | "entrytype" => entry.entry_type.to_string(), + _ => entry.get_as::(field).unwrap_or("Empty".into()), + } +} /// Cut of word at char count index if its set fn format_word(word: &str, count: Option) -> &str { @@ -122,7 +213,7 @@ mod tests { use biblatex::Bibliography; use itertools::Itertools; - use crate::bibiman::citekeys::{CitekeyFormatting, split_formatting_pat}; + use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat}; #[test] fn split_citekey_pattern() { @@ -149,19 +240,36 @@ mod tests { #[test] fn format_citekey_test() { - let src = r"@book{tolkien1937, author = {Tolkien}, title = {\enquote{Lord} of the \textbf{Rings}}, year = {1937}}"; + let src = r" + @book{bhambra_colonialism_social_theory_2021, + title = {Colonialism and \textbf{Modern Social Theory}}, + author = {Bhambra, Gurminder K. and Holmwood, John}, + location = {Cambridge and Medford}, + publisher = {Polity Press}, + date = {2021}, + } + "; let bibliography = Bibliography::parse(src).unwrap(); let mut formatting_struct = CitekeyFormatting { bib_entries: bibliography, fields: vec![ - "author;1;;-;_".into(), - "title;3;3;_;_".into(), + "entrytype;;;;:".into(), + "author;;;-;_".into(), + "title;4;3;_;_".into(), + "location;;4;:;_".into(), "year".into(), ], + case: None, }; formatting_struct.do_formatting(); let keys = formatting_struct.bib_entries.keys().collect_vec(); - assert_eq!(keys[0], "Tolkien_Lor_of_the_1937"); - assert_eq!(keys[0].to_lowercase(), "tolkien_lor_of_the_1937"); + assert_eq!( + keys[0], + "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" + ); + assert_eq!( + keys[0].to_lowercase(), + "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" + ); } } diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs index 9ccf4c4..8c1cc43 100644 --- a/src/bibiman/sanitize.rs +++ b/src/bibiman/sanitize.rs @@ -26,12 +26,12 @@ use optimized_sanitize::optimized_sanitize; macro_rules! optimized_sanitize_bibidata { ($bibidata:expr) => { SanitizedBibiData { - title: optimized_sanitize(&$bibidata.title), + title: optimized_sanitize(false, &$bibidata.title), subtitle: match &$bibidata.subtitle { None => None, - Some(subtitle) => Some(optimized_sanitize(subtitle)), + Some(subtitle) => Some(optimized_sanitize(false, subtitle)), }, - abstract_text: optimized_sanitize(&$bibidata.abstract_text), + abstract_text: optimized_sanitize(false, &$bibidata.abstract_text), } }; } @@ -41,3 +41,7 @@ macro_rules! optimized_sanitize_bibidata { pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData { optimized_sanitize_bibidata!(bibidata) } + +pub fn sanitize_single_string_fully(input: &str) -> String { + optimized_sanitize(true, input) +} diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index 336cc56..dff4d32 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -31,6 +31,17 @@ static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = ph r"\textsc" => ("", Some("")), }; +static LOOKUP_CLEAR_ALL: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! { + r"\mkbibquote" => ("", Some("")), + r"\enquote*" => ("", Some("")), + r"\enquote" => ("", Some("")), + r"\hyphen" => ("", None), + r"\textbf" => ("", Some("")), + r"\textit" => ("", Some("")), + r"\texttt" => ("", Some("")), + r"\textsc" => ("", Some("")), +}; + #[derive(Logos, Debug)] enum Token { #[token("{")] @@ -43,7 +54,12 @@ enum Token { ForcedSpace, } -pub fn optimized_sanitize(input_text: &str) -> String { +pub fn optimized_sanitize(clear_all: bool, input_text: &str) -> String { + let lookup = if clear_all { + &LOOKUP_CLEAR_ALL + } else { + &LOOKUP + }; let mut char_counter: usize = 0; let mut contains_macro: bool = false; for char in input_text.chars() { @@ -87,7 +103,7 @@ pub fn optimized_sanitize(input_text: &str) -> String { } Token::LaTeXMacro => { let texmacro = lex.slice(); - if let Some(x) = LOOKUP.get(&texmacro.trim_end()) { + if let Some(x) = lookup.get(&texmacro.trim_end()) { if let Some(end) = x.1 { bc_up = true; counter_actions.insert(bracket_counter + 1, end); @@ -115,11 +131,17 @@ mod tests { #[test] fn check_sanitization() { let result = optimized_sanitize( + false, r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}", ); assert_eq!( "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"", result - ) + ); + let result = optimized_sanitize( + true, + r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}", + ); + assert_eq!("Intention und Intentionen sind bibquote.", result) } } diff --git a/src/config.rs b/src/config.rs index 78cfef9..8a333e4 100644 --- a/src/config.rs +++ b/src/config.rs @@ -16,21 +16,24 @@ ///// use std::{ - fs::{create_dir_all, File}, - io::{stdin, Write}, + fs::{File, create_dir_all}, + io::{Write, stdin}, path::PathBuf, str::FromStr, }; use color_eyre::{eyre::Result, owo_colors::OwoColorize}; use figment::{ - providers::{Format, Serialized, Toml}, Figment, + providers::{Format, Serialized, Toml}, }; use ratatui::style::Color; use serde::{Deserialize, Serialize}; -use crate::{bibiman::bibisetup::CustomField, cliargs::CLIArgs}; +use crate::{ + bibiman::{bibisetup::CustomField, citekeys::CitekeyCase}, + cliargs::CLIArgs, +}; const DEFAULT_CONFIG: &str = r##" # [general] @@ -147,6 +150,7 @@ pub struct Colors { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct CitekeyFormatter { pub fields: Vec, + pub case: Option, } impl Default for BibiConfig { @@ -167,7 +171,10 @@ impl Default for BibiConfig { custom_column: CustomField::Pubtype, }, colors: Self::dark_colors(), - citekey_formatter: CitekeyFormatter { fields: Vec::new() }, + citekey_formatter: CitekeyFormatter { + fields: Vec::new(), + case: None, + }, } } } @@ -194,7 +201,10 @@ impl BibiConfig { } else { Self::dark_colors() }, - citekey_formatter: CitekeyFormatter { fields: Vec::new() }, + citekey_formatter: CitekeyFormatter { + fields: Vec::new(), + case: None, + }, } } @@ -352,8 +362,8 @@ fn select_opener() -> String { #[cfg(test)] mod tests { use figment::{ - providers::{Format, Toml}, Figment, + providers::{Format, Toml}, }; use super::BibiConfig; -- cgit v1.2.3 From 8b858f92da69cfb8fa43ec861cda46eeb6ef4bbe Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 8 Oct 2025 14:39:46 +0200 Subject: case parsing from config, needs to be implemented for citekey struct --- src/bibiman/citekeys.rs | 95 +++++++++++++++++++++++-------------------------- src/config.rs | 8 +++-- 2 files changed, 49 insertions(+), 54 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index a304e92..118ae3e 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -23,7 +23,7 @@ use serde::{Deserialize, Serialize}; use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub(crate) enum CitekeyCase { +pub enum CitekeyCase { #[serde(alias = "uppercase", alias = "upper")] Upper, #[serde(alias = "lowercase", alias = "lower")] @@ -44,7 +44,11 @@ impl CitekeyFormatting { /// The `Bibliography` inserted will be edited in place with the new citekeys. /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography` pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result { - let fields = cfg.citekey_formatter.fields.clone(); + let fields = cfg + .citekey_formatter + .fields + .clone() + .expect("Need to define fields in config to format citekeys"); if fields.is_empty() { return Err(eyre!( "To format all citekeys, you need to provide {} values in the config file", @@ -58,65 +62,54 @@ impl CitekeyFormatting { }) } + /// Process the actual formatting. The citekey of every entry will be updated. pub fn do_formatting(&mut self) { for entry in self.bib_entries.iter_mut() { - let mut new_citekey = String::new(); - for pattern in self.fields.iter() { - let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = - split_formatting_pat(pattern); - let formatted_field_str = { - let mut formatted_str = String::new(); - let field = preformat_field(field, entry); - // let field = if let Ok(val) = entry.get_as::(field) { - // val - // } else { - // eprintln!( - // "Unable to get field {} for entry {}", - // field.bright_red(), - // &entry.key.bold() - // ); - // continue; - // }; - // let field = entry.get_as::(field).expect(&format!( - // "Couldn't find field {}", - // field.bold().bright_red() - // )); - let mut split_field = field.split_whitespace(); - let mut words_passed = 0; - let word_count = if let Some(val) = word_count { - val + entry.key = build_citekey(entry, &self.fields); + } + } +} + +/// Build the citekey from the patterns defined in the config file +fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String { + let mut new_citekey = String::new(); + for pattern in pattern_fields.iter() { + let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = + split_formatting_pat(pattern); + let formatted_field_str = { + let mut formatted_str = String::new(); + let field = preformat_field(field, entry); + let mut split_field = field.split_whitespace(); + let mut words_passed = 0; + let word_count = if let Some(val) = word_count { + val + } else { + field.split_whitespace().count() + }; + loop { + if let Some(field_slice) = split_field.next() { + formatted_str = formatted_str + format_word(field_slice, char_count); + words_passed += 1; + if word_count == words_passed { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; } else { - field.split_whitespace().count() - // split_field.size_hint().0 + 1 - }; - dbg!(word_count); - loop { - if let Some(field_slice) = split_field.next() { - formatted_str = formatted_str + format_word(field_slice, char_count); - words_passed += 1; - // if word_count.is_some_and(|count| count == words_passed) { - if word_count == words_passed { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or("") - } - } else { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - }; + formatted_str = formatted_str + inner_delimiter.unwrap_or("") } - formatted_str + } else { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; }; - new_citekey = new_citekey + &formatted_field_str; } - entry.key = new_citekey; - } + formatted_str + }; + new_citekey = new_citekey + &formatted_field_str; } + new_citekey } /// Preformat some fields which are very common to be used in citekeys -fn preformat_field(field: &str, entry: &mut Entry) -> String { +fn preformat_field(field: &str, entry: &Entry) -> String { match field { "title" => { sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) diff --git a/src/config.rs b/src/config.rs index 8a333e4..a5df61c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -149,7 +149,7 @@ pub struct Colors { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct CitekeyFormatter { - pub fields: Vec, + pub fields: Option>, pub case: Option, } @@ -172,7 +172,7 @@ impl Default for BibiConfig { }, colors: Self::dark_colors(), citekey_formatter: CitekeyFormatter { - fields: Vec::new(), + fields: None, case: None, }, } @@ -202,7 +202,7 @@ impl BibiConfig { Self::dark_colors() }, citekey_formatter: CitekeyFormatter { - fields: Vec::new(), + fields: None, case: None, }, } @@ -400,6 +400,8 @@ mod tests { author_color = "38" title_color = "37" year_color = "135" + + [citekey_formatter] "#, )?; -- cgit v1.2.3 From 952dc94b412ffcff26a59c37f3112079c78058ff Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 8 Oct 2025 22:30:46 +0200 Subject: use vector for old new key pairs --- src/bibiman/citekeys.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 118ae3e..b389da2 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -15,6 +15,12 @@ // along with this program. If not, see . ///// +use std::{ + fs::File, + io::Write, + path::{Path, PathBuf}, +}; + use biblatex::{Bibliography, ChunksExt, Entry, Type}; use color_eyre::eyre::eyre; use owo_colors::OwoColorize; @@ -32,9 +38,11 @@ pub enum CitekeyCase { #[derive(Debug, Default, Clone)] pub(crate) struct CitekeyFormatting { + bibfile_path: PathBuf, bib_entries: Bibliography, fields: Vec, case: Option, + old_new_keys_map: Vec<(String, String)>, } impl CitekeyFormatting { @@ -43,7 +51,11 @@ impl CitekeyFormatting { /// multi-file setups. /// The `Bibliography` inserted will be edited in place with the new citekeys. /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography` - pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result { + pub fn new>( + cfg: &BibiConfig, + path: P, + bib_entries: Bibliography, + ) -> color_eyre::Result { let fields = cfg .citekey_formatter .fields @@ -56,22 +68,39 @@ impl CitekeyFormatting { )); } Ok(Self { + bibfile_path: path.as_ref().to_path_buf(), bib_entries, fields, case: cfg.citekey_formatter.case.clone(), + old_new_keys_map: Vec::new(), }) } /// Process the actual formatting. The citekey of every entry will be updated. pub fn do_formatting(&mut self) { + let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter_mut() { - entry.key = build_citekey(entry, &self.fields); + old_new_keys.push(( + entry.key.clone(), + build_citekey(entry, &self.fields, self.case.as_ref()), + )); } + + self.old_new_keys_map = old_new_keys; + } + + /// Write entries with updated citekeys to bibfile + pub fn update_file(&self) -> color_eyre::Result<()> { + let mut file = File::open(&self.bibfile_path)?; + + file.write_all(self.bib_entries.to_biblatex_string().as_bytes())?; + + Ok(()) } } /// Build the citekey from the patterns defined in the config file -fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String { +fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { let mut new_citekey = String::new(); for pattern in pattern_fields.iter() { let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = @@ -105,7 +134,14 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String { }; new_citekey = new_citekey + &formatted_field_str; } - new_citekey + if let Some(case_format) = case { + match case_format { + CitekeyCase::Lower => new_citekey.to_lowercase(), + CitekeyCase::Upper => new_citekey.to_uppercase(), + } + } else { + new_citekey + } } /// Preformat some fields which are very common to be used in citekeys @@ -203,6 +239,8 @@ fn split_formatting_pat( #[cfg(test)] mod tests { + use std::path::PathBuf; + use biblatex::Bibliography; use itertools::Itertools; @@ -244,6 +282,7 @@ mod tests { "; let bibliography = Bibliography::parse(src).unwrap(); let mut formatting_struct = CitekeyFormatting { + bibfile_path: PathBuf::new(), bib_entries: bibliography, fields: vec![ "entrytype;;;;:".into(), @@ -253,6 +292,7 @@ mod tests { "year".into(), ], case: None, + old_new_keys_map: Vec::new(), }; formatting_struct.do_formatting(); let keys = formatting_struct.bib_entries.keys().collect_vec(); @@ -260,9 +300,23 @@ mod tests { keys[0], "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" ); + formatting_struct.case = Some(CitekeyCase::Lower); + formatting_struct.do_formatting(); + let keys = formatting_struct.bib_entries.keys().collect_vec(); assert_eq!( - keys[0].to_lowercase(), + keys[0], "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" ); + // let bib_string = formatting_struct.bib_entries.to_biblatex_string(); + // let new_entry = r" + // @book{book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021, + // title = {Colonialism and \textbf{Modern Social Theory}}, + // author = {Bhambra, Gurminder K. and Holmwood, John}, + // location = {Cambridge and Medford}, + // publisher = {Polity Press}, + // date = {2021}, + // } + // "; + // assert_eq!(new_entry, bib_string); } } -- cgit v1.2.3 From 7266a14753ed5d572aeed584b66b07d1b9921ca7 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Thu, 9 Oct 2025 14:28:55 +0200 Subject: rewrite cli parsing; need to implement format-citekeys cli parsing --- src/bibiman/citekeys.rs | 105 ++++++++++++++++++++++++++++++++++++------------ src/cliargs.rs | 52 +++++++++++++++++++----- src/main.rs | 30 +------------- 3 files changed, 124 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index b389da2..b7995ac 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -16,7 +16,7 @@ ///// use std::{ - fs::File, + fs::OpenOptions, io::Write, path::{Path, PathBuf}, }; @@ -38,7 +38,9 @@ pub enum CitekeyCase { #[derive(Debug, Default, Clone)] pub(crate) struct CitekeyFormatting { - bibfile_path: PathBuf, + /// bibfile to replace keys at. The optional fields defines a differing + /// output file to write to, otherwise original file will be overwritten. + bibfile_path: (PathBuf, Option), bib_entries: Bibliography, fields: Vec, case: Option, @@ -54,6 +56,7 @@ impl CitekeyFormatting { pub fn new>( cfg: &BibiConfig, path: P, + target: Option

, bib_entries: Bibliography, ) -> color_eyre::Result { let fields = cfg @@ -68,7 +71,10 @@ impl CitekeyFormatting { )); } Ok(Self { - bibfile_path: path.as_ref().to_path_buf(), + bibfile_path: ( + path.as_ref().to_path_buf(), + target.map(|p| p.as_ref().to_path_buf()), + ), bib_entries, fields, case: cfg.citekey_formatter.case.clone(), @@ -77,9 +83,9 @@ impl CitekeyFormatting { } /// Process the actual formatting. The citekey of every entry will be updated. - pub fn do_formatting(&mut self) { + pub fn do_formatting(&mut self) -> &mut Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); - for entry in self.bib_entries.iter_mut() { + for entry in self.bib_entries.iter() { old_new_keys.push(( entry.key.clone(), build_citekey(entry, &self.fields, self.case.as_ref()), @@ -87,16 +93,47 @@ impl CitekeyFormatting { } self.old_new_keys_map = old_new_keys; + + self } /// Write entries with updated citekeys to bibfile pub fn update_file(&self) -> color_eyre::Result<()> { - let mut file = File::open(&self.bibfile_path)?; + let source_file = self.bibfile_path.0.as_path(); + let target_file = if let Some(path) = &self.bibfile_path.1 { + path + } else { + source_file + }; + let mut content = std::fs::read_to_string(source_file)?; - file.write_all(self.bib_entries.to_biblatex_string().as_bytes())?; + for (old_key, new_key) in self.old_new_keys_map.iter() { + content = content.replace(old_key, new_key); + } + + let mut new_file = OpenOptions::new() + .truncate(true) + .write(true) + .create(true) + .open(target_file)?; + + new_file.write_all(content.as_bytes())?; Ok(()) } + + /// Sort the vector containing old/new citekey pairs by the length of the latter. + /// That will prevent the replacement longer key parts that equal a full shorter + /// key. + /// + /// You are **very encouraged** to call this method before `update_file()` to + /// prevent replacing citekeys partly which afterwards wont match the pattern + /// anymore. + pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self { + self.old_new_keys_map + .sort_by(|a, b| b.1.len().cmp(&a.1.len())); + self + } } /// Build the citekey from the patterns defined in the config file @@ -272,6 +309,18 @@ mod tests { #[test] fn format_citekey_test() { let src = r" + @article{bos_latex_metadata_and_publishing_workflows_2023, + title = {{LaTeX}, metadata, and publishing workflows}, + author = {Bos, Joppe W. and {McCurley}, Kevin S.}, + year = {2023}, + month = apr, + journal = {arXiv}, + number = {{arXiv}:2301.08277}, + doi = {10.48550/arXiv.2301.08277}, + url = {http://arxiv.org/abs/2301.08277}, + urldate = {2023-08-22}, + note = {type: article}, + } @book{bhambra_colonialism_social_theory_2021, title = {Colonialism and \textbf{Modern Social Theory}}, author = {Bhambra, Gurminder K. and Holmwood, John}, @@ -282,7 +331,7 @@ mod tests { "; let bibliography = Bibliography::parse(src).unwrap(); let mut formatting_struct = CitekeyFormatting { - bibfile_path: PathBuf::new(), + bibfile_path: (PathBuf::new(), None), bib_entries: bibliography, fields: vec![ "entrytype;;;;:".into(), @@ -294,29 +343,35 @@ mod tests { case: None, old_new_keys_map: Vec::new(), }; - formatting_struct.do_formatting(); - let keys = formatting_struct.bib_entries.keys().collect_vec(); + let _ = formatting_struct.do_formatting(); + assert_eq!( + formatting_struct.old_new_keys_map.get(0).unwrap().1, + "article:Bos-McCurley_LaT_met_and_pub_Empt_2023" + ); assert_eq!( - keys[0], + formatting_struct.old_new_keys_map.get(1).unwrap().1, "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" ); formatting_struct.case = Some(CitekeyCase::Lower); - formatting_struct.do_formatting(); - let keys = formatting_struct.bib_entries.keys().collect_vec(); + let _ = formatting_struct.do_formatting().rev_sort_new_keys_by_len(); + // now the longer citekey is processed first and its in lowercase! assert_eq!( - keys[0], + formatting_struct.old_new_keys_map.get(0).unwrap().1, "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" ); - // let bib_string = formatting_struct.bib_entries.to_biblatex_string(); - // let new_entry = r" - // @book{book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021, - // title = {Colonialism and \textbf{Modern Social Theory}}, - // author = {Bhambra, Gurminder K. and Holmwood, John}, - // location = {Cambridge and Medford}, - // publisher = {Polity Press}, - // date = {2021}, - // } - // "; - // assert_eq!(new_entry, bib_string); + } + + #[test] + fn sorting_appended_citekeys() { + let mut keys: Vec<(String, String)> = vec![ + ("smith2000".into(), "smith_book_2000".into()), + ("smith2000a".into(), "smith_book_2000a".into()), + ("smith2000ab".into(), "smith_book_2000ab".into()), + ]; + keys.sort_by(|a, b| b.1.len().cmp(&a.1.len())); + let mut keys = keys.iter(); + assert_eq!(keys.next().unwrap().1, "smith_book_2000ab"); + assert_eq!(keys.next().unwrap().1, "smith_book_2000a"); + assert_eq!(keys.next().unwrap().1, "smith_book_2000"); } } diff --git a/src/cliargs.rs b/src/cliargs.rs index 082ecda..3b12fc3 100644 --- a/src/cliargs.rs +++ b/src/cliargs.rs @@ -18,20 +18,19 @@ use color_eyre::eyre::Result; use dirs::{config_dir, home_dir}; use lexopt::prelude::*; +use owo_colors::OwoColorize; use owo_colors::colors::css::LightGreen; use owo_colors::colors::*; -use owo_colors::OwoColorize; use std::env; use std::path::PathBuf; use walkdir::WalkDir; use crate::app; +use crate::config::BibiConfig; // struct for CLIArgs #[derive(Debug, Default, Clone)] pub struct CLIArgs { - pub helparg: bool, - pub versionarg: bool, pub pos_args: Vec, pub cfg_path: Option, pub light_theme: bool, @@ -39,7 +38,7 @@ pub struct CLIArgs { } impl CLIArgs { - pub fn parse_args() -> Result { + pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig)> { let mut args = CLIArgs::default(); let mut parser = lexopt::Parser::from_env(); @@ -52,22 +51,57 @@ impl CLIArgs { None }; + // if parser + // .raw_args() + // .is_ok_and(|mut arg| arg.next_if(|a| a == "format-citekeys").is_some()) + // { + // todo!("Format citekeys options"); + // } + while let Some(arg) = parser.next()? { match arg { - Short('h') | Long("help") => args.helparg = true, - Short('v') | Long("version") => args.versionarg = true, + Short('h') | Long("help") => { + println!("{}", help_func()); + std::process::exit(0); + } + Short('v') | Long("version") => { + println!("{}", version_func()); + std::process::exit(0); + } Short('c') | Long("config-file") => args.cfg_path = Some(parser.value()?.parse()?), Long("light-terminal") => args.light_theme = true, Long("pdf-path") => { args.pdf_path = Some(parser.value()?.parse()?); } // Value(pos_arg) => parse_files(&mut args, pos_arg), - Value(pos_arg) => args.pos_args.push(pos_arg.into()), - _ => return Err(arg.unexpected()), + Value(pos_arg) => { + if args.pos_args.is_empty() && pos_arg == "format-citekeys" { + todo!("Write format citekeys function"); + } else { + args.pos_args.push(parser.value()?.into()); + } + } + _ => return Err(arg.unexpected().into()), } } - Ok(args) + if args + .cfg_path + .as_ref() + .is_some_and(|f| !f.try_exists().unwrap() || !f.is_file()) + { + BibiConfig::create_default_config(&args); + } + + let mut cfg = if args.cfg_path.is_some() { + BibiConfig::parse_config(&args)? + } else { + BibiConfig::new(&args) + }; + + cfg.cli_overwrite(&args); + + Ok((args, cfg)) } } diff --git a/src/main.rs b/src/main.rs index c956d7c..58805d5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,35 +31,7 @@ pub mod tui; #[tokio::main] async fn main() -> Result<()> { // Parse CLI arguments - let mut parsed_args = CLIArgs::parse_args()?; - - // Print help if -h/--help flag is passed and exit - if parsed_args.helparg { - println!("{}", cliargs::help_func()); - std::process::exit(0); - } - - // Print version if -v/--version flag is passed and exit - if parsed_args.versionarg { - println!("{}", cliargs::version_func()); - std::process::exit(0); - } - - if parsed_args - .cfg_path - .as_ref() - .is_some_and(|f| !f.try_exists().unwrap() || !f.is_file()) - { - BibiConfig::create_default_config(&parsed_args); - } - - let mut cfg = if parsed_args.cfg_path.is_some() { - BibiConfig::parse_config(&parsed_args)? - } else { - BibiConfig::new(&parsed_args) - }; - - cfg.cli_overwrite(&parsed_args); + let (mut parsed_args, mut cfg) = CLIArgs::parse_args()?; init_error_hooks()?; -- cgit v1.2.3 From 669936a8e4ff99012e8b32ae15616f8fe206ab2d Mon Sep 17 00:00:00 2001 From: lukeflo Date: Thu, 9 Oct 2025 16:48:54 +0200 Subject: subcommand test for pure cli operations --- src/cliargs.rs | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/cliargs.rs b/src/cliargs.rs index 3b12fc3..26a07af 100644 --- a/src/cliargs.rs +++ b/src/cliargs.rs @@ -15,7 +15,6 @@ // along with this program. If not, see . ///// -use color_eyre::eyre::Result; use dirs::{config_dir, home_dir}; use lexopt::prelude::*; use owo_colors::OwoColorize; @@ -41,6 +40,7 @@ impl CLIArgs { pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig)> { let mut args = CLIArgs::default(); let mut parser = lexopt::Parser::from_env(); + let mut subcommand = None; // Default config args.cfg_path = if config_dir().is_some() { @@ -73,12 +73,22 @@ impl CLIArgs { Long("pdf-path") => { args.pdf_path = Some(parser.value()?.parse()?); } - // Value(pos_arg) => parse_files(&mut args, pos_arg), Value(pos_arg) => { - if args.pos_args.is_empty() && pos_arg == "format-citekeys" { - todo!("Write format citekeys function"); + if args.pos_args.is_empty() { + let value = pos_arg + .into_string() + .unwrap_or_else(|os| os.to_string_lossy().to_string()); + match value.as_str() { + "format-citekeys" => { + subcommand = Some(value); + break; + } + _ => { + args.pos_args.push(value.into()); + } + } } else { - args.pos_args.push(parser.value()?.into()); + args.pos_args.push(pos_arg.into()); } } _ => return Err(arg.unexpected().into()), @@ -88,7 +98,7 @@ impl CLIArgs { if args .cfg_path .as_ref() - .is_some_and(|f| !f.try_exists().unwrap() || !f.is_file()) + .is_some_and(|f| f.try_exists().is_err() || !f.is_file()) { BibiConfig::create_default_config(&args); } @@ -99,6 +109,13 @@ impl CLIArgs { BibiConfig::new(&args) }; + if let Some(cmd) = subcommand { + match cmd.as_str() { + "format-citekeys" => todo!("write citekey formatting"), + _ => {} + } + } + cfg.cli_overwrite(&args); Ok((args, cfg)) -- cgit v1.2.3 From f9548af5c7693edf536b4ad45564a964338e2c2e Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 10:31:45 +0200 Subject: set up citekey formatting cli, reformat help output --- Cargo.lock | 1 + Cargo.toml | 1 + src/bibiman/citekeys.rs | 87 ++++++++++++++++++++++++++++++++++++++- src/cliargs.rs | 106 ++++++++++++++++++++++++++++++++---------------- src/main.rs | 14 ++++--- 5 files changed, 168 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/Cargo.lock b/Cargo.lock index 49f65b4..a27636e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -107,6 +107,7 @@ dependencies = [ "editor-command", "figment", "futures", + "indoc", "itertools", "lexopt", "logos", diff --git a/Cargo.toml b/Cargo.toml index 098848e..abf1eee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ figment = { version = "0.10.19", features = [ "toml", "test" ]} owo-colors = "4.2.2" logos = "0.15.1" phf = { version = "0.13.1", features = ["macros"] } +indoc = "2.0.6" [workspace.metadata.cross.target.aarch64-unknown-linux-gnu] # Install libssl-dev:arm64, see diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index b7995ac..cafd124 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -23,7 +23,12 @@ use std::{ use biblatex::{Bibliography, ChunksExt, Entry, Type}; use color_eyre::eyre::eyre; -use owo_colors::OwoColorize; +use indoc::formatdoc; +use lexopt::Arg::{Long, Short}; +use owo_colors::{ + OwoColorize, + colors::{BrightBlue, Green, White}, +}; use serde::{Deserialize, Serialize}; use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; @@ -48,6 +53,30 @@ pub(crate) struct CitekeyFormatting { } impl CitekeyFormatting { + pub(crate) fn parse_citekey_cli( + parser: &mut lexopt::Parser, + cfg: &BibiConfig, + ) -> color_eyre::Result<()> { + let mut formatter = CitekeyFormatting::default(); + + while let Some(arg) = parser.next()? { + match arg { + Short('h') | Long("help") => { + formatting_help(); + return Ok(()); + } + Short('s') | Short('f') | Long("source") | Long("file") => { + formatter.bibfile_path.0 = parser.value()?.into() + } + Short('t') | Short('o') | Long("target") | Long("output") => { + formatter.bibfile_path.1 = Some(parser.value()?.into()) + } + _ => return Err(arg.unexpected().into()), + } + } + + Ok(()) + } /// Start Citekey formatting with building a new instance of `CitekeyFormatting` /// Formatting is processed file by file, because `bibman` can handle /// multi-file setups. @@ -136,6 +165,62 @@ impl CitekeyFormatting { } } +fn formatting_help() { + let help = vec![ + formatdoc!( + "{} {}\n", + env!("CARGO_PKG_NAME").fg::().bold(), + env!("CARGO_PKG_VERSION") + ), + formatdoc!("{}", "USAGE".bold()), + formatdoc!( + "\t{} {} {} {}\n", + env!("CARGO_PKG_NAME").fg::().bold(), + "format-citekeys".bold(), + "--source=".bold(), + "--output=".bold() + ), + formatdoc!( + " + \tThis help describes the CLI usage for the citekey formatting + \tfunctionality of bibiman. The definition of patterns how the + \tcitekeys should be formatted must be set in the config file. + \tFor further informations how to use this patterns etc. see: + \t{} + ", + "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman" + .italic() + .fg::() + ), + formatdoc!("{}", "OPTIONS".bold()), + formatdoc!( + " + \t{} + \tShow this help and exit + ", + "-h, --help".fg::().bold() + ), + formatdoc! {" + \t{} + \tThe bibfile for which the citekey formatting should be processed. + \tTakes a path as argument. + ", "-s, -f, --source=, --file=".fg::().bold()}, + formatdoc!( + " + \t{} + \tThe bibfile to which the updated content should be written. + \tTakes a path as argument. If the file doesn't exist, it will be + \tcreated. + \tIf the argument isn't used, the original file will be {}! + ", + "-t, -o, --target=, --output=".fg::().bold(), + "overwritten".italic(), + ), + ]; + let help = help.join("\n"); + println!("{}", help); +} + /// Build the citekey from the patterns defined in the config file fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { let mut new_citekey = String::new(); diff --git a/src/cliargs.rs b/src/cliargs.rs index 26a07af..e766e77 100644 --- a/src/cliargs.rs +++ b/src/cliargs.rs @@ -16,6 +16,7 @@ ///// use dirs::{config_dir, home_dir}; +use indoc::formatdoc; use lexopt::prelude::*; use owo_colors::OwoColorize; use owo_colors::colors::css::LightGreen; @@ -25,6 +26,7 @@ use std::path::PathBuf; use walkdir::WalkDir; use crate::app; +use crate::bibiman::citekeys::CitekeyFormatting; use crate::config::BibiConfig; // struct for CLIArgs @@ -37,10 +39,16 @@ pub struct CLIArgs { } impl CLIArgs { - pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig)> { + /// This struct parses the command line and initializes and returns the + /// necessary structs `CLIArgs` and `BibiConfig`. + /// + /// Additionally, it returns a bool which defines if the TUI should be run + /// or not. The latter is the case for pure CLI processes as `format-citekeys`. + pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig, bool)> { let mut args = CLIArgs::default(); let mut parser = lexopt::Parser::from_env(); let mut subcommand = None; + let mut run_tui = true; // Default config args.cfg_path = if config_dir().is_some() { @@ -81,6 +89,7 @@ impl CLIArgs { match value.as_str() { "format-citekeys" => { subcommand = Some(value); + run_tui = false; break; } _ => { @@ -111,14 +120,16 @@ impl CLIArgs { if let Some(cmd) = subcommand { match cmd.as_str() { - "format-citekeys" => todo!("write citekey formatting"), + "format-citekeys" => { + CitekeyFormatting::parse_citekey_cli(&mut parser, &cfg)?; + } _ => {} } } cfg.cli_overwrite(&args); - Ok((args, cfg)) + Ok((args, cfg, run_tui)) } } @@ -172,14 +183,21 @@ pub fn help_func() -> String { env!("CARGO_PKG_VERSION").fg::(), ), format!( - "{}:\n\t{} [Flags] [files/dirs]\n", + "{}\n\t{} [OPTIONS] [SUBCOMMAND | POSITIONAL ARGUMENTS]\n", "USAGE".bold(), - "bibiman".bold() + env!("CARGO_PKG_NAME").fg::().bold() + ), + formatdoc!( + " + \tYou can either use a {} or {}, not both! + ", + "subcommand".bold(), + "positional arguments".bold() ), format!( - "{}:\n\t{}\t\tPath to {} file", + "{}\n\t{}\t\tPath to {} file", "POSITIONAL ARGUMENTS".bold(), - "".fg::().bold(), + "".fg::().bold(), ".bib".fg::().bold() ), format!( @@ -188,38 +206,58 @@ pub fn help_func() -> String { ".bib".fg::().bold() ), format!("\n\t{}", "Both can be passed multiple times".italic()), - format!("\n{}:", "FLAGS".bold()), - format!("\t{}", "-h, --help".bold().fg::()), - format!("\t\t{}", "Show this help and exit"), - format!("\t{}", "-v, --version".bold().fg::()), - format!("\t\t{}", "Show the version and exit"), - format!("\t{}", "--light-terminal".bold().fg::()), - format!( - "\t\t{}", - "Enable default colors for light terminal background" + format!("\n{}", "SUBCOMMANDS".bold()), + formatdoc!( + " + \t{} + \tRun the citekey formatting procedure on a specified bibfile. + \tFor further infos run {} + ", + "format-citekeys".fg::().bold(), + "bibiman format-citekeys --help".fg::().bold() ), - format!( - "\t{}{}", - "-c, --config-file=".bold().fg::(), - "".bold().italic().fg::() + format!("{}", "OPTIONS".bold()), + formatdoc!( + " + \t{} + \tShow this help and exit + ", + "-h, --help".bold().fg::() ), - format!("\t\t{}", "Path to config file used for current session."), - format!("\t\t{}", "Takes precedence over standard config file."), - format!( - "\t{}{}", - "--pdf-path=".bold().fg::(), - "".bold().italic().fg::() + formatdoc!( + " + \t{} + \tShow the version and exit + ", + "-v, --version".bold().fg::() ), - format!("\t\t{}", "Path to directory containing PDF files."), - format!( - "\t\t{}", - "If the pdf files basename matches an entrys citekey," + formatdoc!( + " + \t{} + \tEnable default colors for light terminal background + ", + "--light-terminal".bold().fg::() ), - format!( - "\t\t{}", - "its attached as connected PDF file for the current session." + formatdoc!( + " + \t{}{} + \tPath to config file used for current session. + \tTakes precedence over standard config file. + ", + "-c, --config-file=".bold().fg::(), + "".bold().italic().fg::() + ), + formatdoc!( + " + \t{}{} + \tPath to directory containing PDF files. + \tIf the pdf files basename matches an entrys citekey, + \tits attached as connected PDF file for the current session. + \tDoes not edit the bibfile itself! + ", + "--pdf-path=".bold().fg::(), + "".bold().italic().fg::() ), - format!("\t\t{}", "Does not edit the bibfile itself!"), ]; let help = help.join("\n"); help diff --git a/src/main.rs b/src/main.rs index 58805d5..e735eb0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,7 +18,6 @@ use app::App; use cliargs::CLIArgs; use color_eyre::eyre::Result; -use config::BibiConfig; use errorsetup::init_error_hooks; pub mod app; @@ -31,13 +30,16 @@ pub mod tui; #[tokio::main] async fn main() -> Result<()> { // Parse CLI arguments - let (mut parsed_args, mut cfg) = CLIArgs::parse_args()?; + let (mut parsed_args, mut cfg, run_tui) = CLIArgs::parse_args()?; - init_error_hooks()?; + if run_tui { + init_error_hooks()?; - // Create an application. - let mut app = App::new(&mut parsed_args, &mut cfg)?; + // Create an application. + let mut app = App::new(&mut parsed_args, &mut cfg)?; + + app.run(&cfg).await?; + } - app.run(&cfg).await?; Ok(()) } -- cgit v1.2.3 From 4779dbc5fe3712bce31bbb5f1f43c28c4c839420 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 13:47:07 +0200 Subject: substitute byte index for char counting loop, impl `dry-run` option for citekey formatting --- src/bibiman/citekeys.rs | 118 +++++++++++++++++++++++++++++++++++------------- tests/test-config.toml | 4 ++ 2 files changed, 90 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index cafd124..5121741 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -22,7 +22,7 @@ use std::{ }; use biblatex::{Bibliography, ChunksExt, Entry, Type}; -use color_eyre::eyre::eyre; +use color_eyre::eyre::{OptionExt, eyre}; use indoc::formatdoc; use lexopt::Arg::{Long, Short}; use owo_colors::{ @@ -50,6 +50,7 @@ pub(crate) struct CitekeyFormatting { fields: Vec, case: Option, old_new_keys_map: Vec<(String, String)>, + dry_run: bool, } impl CitekeyFormatting { @@ -59,12 +60,27 @@ impl CitekeyFormatting { ) -> color_eyre::Result<()> { let mut formatter = CitekeyFormatting::default(); + formatter.fields = cfg + .citekey_formatter + .fields + .clone() + .ok_or_eyre("Need to define fields correctly in config file")?; + + formatter.case = cfg.citekey_formatter.case.clone(); + + if formatter.fields.is_empty() { + return Err(eyre!( + "To format all citekeys, you need to provide {} values in the config file", + "fields".bold() + )); + } while let Some(arg) = parser.next()? { match arg { Short('h') | Long("help") => { formatting_help(); return Ok(()); } + Short('d') | Long("dry-run") => formatter.dry_run = true, Short('s') | Short('f') | Long("source") | Long("file") => { formatter.bibfile_path.0 = parser.value()?.into() } @@ -75,6 +91,16 @@ impl CitekeyFormatting { } } + let bibstring = std::fs::read_to_string(&formatter.bibfile_path.0)?; + + formatter.bib_entries = Bibliography::parse(&bibstring) + .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?; + + formatter + .do_formatting() + .rev_sort_new_keys_by_len() + .update_file()?; + Ok(()) } /// Start Citekey formatting with building a new instance of `CitekeyFormatting` @@ -108,6 +134,7 @@ impl CitekeyFormatting { fields, case: cfg.citekey_formatter.case.clone(), old_new_keys_map: Vec::new(), + dry_run: false, }) } @@ -122,32 +149,40 @@ impl CitekeyFormatting { } self.old_new_keys_map = old_new_keys; + // dbg!(&self.old_new_keys_map); self } /// Write entries with updated citekeys to bibfile - pub fn update_file(&self) -> color_eyre::Result<()> { - let source_file = self.bibfile_path.0.as_path(); - let target_file = if let Some(path) = &self.bibfile_path.1 { - path + pub fn update_file(&mut self) -> color_eyre::Result<()> { + if self.dry_run { + println!("Following citekeys would be formatted: old => new\n"); + self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0)); + for (old, new) in &self.old_new_keys_map { + println!("{} => {}", old.italic(), new.bold()) + } } else { - source_file - }; - let mut content = std::fs::read_to_string(source_file)?; - - for (old_key, new_key) in self.old_new_keys_map.iter() { - content = content.replace(old_key, new_key); - } + let source_file = self.bibfile_path.0.as_path(); + let target_file = if let Some(path) = &self.bibfile_path.1 { + path + } else { + source_file + }; + let mut content = std::fs::read_to_string(source_file)?; - let mut new_file = OpenOptions::new() - .truncate(true) - .write(true) - .create(true) - .open(target_file)?; + for (old_key, new_key) in self.old_new_keys_map.iter() { + content = content.replace(old_key, new_key); + } - new_file.write_all(content.as_bytes())?; + let mut new_file = OpenOptions::new() + .truncate(true) + .write(true) + .create(true) + .open(target_file)?; + new_file.write_all(content.as_bytes())?; + } Ok(()) } @@ -160,7 +195,7 @@ impl CitekeyFormatting { /// anymore. pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self { self.old_new_keys_map - .sort_by(|a, b| b.1.len().cmp(&a.1.len())); + .sort_by(|a, b| b.0.len().cmp(&a.0.len())); self } } @@ -232,14 +267,20 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey let field = preformat_field(field, entry); let mut split_field = field.split_whitespace(); let mut words_passed = 0; - let word_count = if let Some(val) = word_count { + let field_count = field.split_whitespace().count(); + let word_count = if let Some(val) = word_count + && val <= field_count + { val } else { - field.split_whitespace().count() + field_count }; loop { + if field.is_empty() { + break; + } if let Some(field_slice) = split_field.next() { - formatted_str = formatted_str + format_word(field_slice, char_count); + formatted_str = formatted_str + &format_word(field_slice, char_count); words_passed += 1; if word_count == words_passed { formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); @@ -270,7 +311,7 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey fn preformat_field(field: &str, entry: &Entry) -> String { match field { "title" => { - sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) } "author" => { if let Ok(authors) = entry.author() { @@ -278,21 +319,20 @@ fn preformat_field(field: &str, entry: &Entry) -> String { for a in authors.iter() { last_names = last_names + &a.name + " "; } - dbg!(&last_names); last_names } else { - "NA".to_string() + "".to_string() } } "year" => { if let Ok(date) = entry.date() { date.to_chunks().format_verbatim()[..4].to_string() } else { - entry.get_as::(field).unwrap_or("NA".into()) + entry.get_as::(field).unwrap_or("".into()) } } "subtitle" => { - sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) } "editor" => { if let Ok(editors) = entry.editors() { @@ -304,22 +344,35 @@ fn preformat_field(field: &str, entry: &Entry) -> String { } last_names } else { - "NA".to_string() + "".to_string() } } "pubtype" | "entrytype" => entry.entry_type.to_string(), - _ => entry.get_as::(field).unwrap_or("Empty".into()), + _ => entry.get_as::(field).unwrap_or("".into()), } } /// Cut of word at char count index if its set -fn format_word(word: &str, count: Option) -> &str { +fn format_word(word: &str, count: Option) -> String { if let Some(len) = count && len < word.chars().count() { - &word[..len] + // Since chars can consist of multiple bytes, we need this more complex + // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) + // instead of simple byte indexing + let mut word_slice = String::new(); + let word_chars = word.chars(); + let mut counter = 0; + for c in word_chars { + if counter == len { + break; + } + word_slice.push(c); + counter += 1; + } + word_slice } else { - word + word.to_string() } } @@ -427,6 +480,7 @@ mod tests { ], case: None, old_new_keys_map: Vec::new(), + dry_run: false, }; let _ = formatting_struct.do_formatting(); assert_eq!( diff --git a/tests/test-config.toml b/tests/test-config.toml index fc447f1..b484b69 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -59,3 +59,7 @@ custom_column = "series" # author_color = "38" # title_color = "37" # year_color = "135" + +[citekey_formatter] +fields = [ "author;2;;-;_", "title;3;3;_;_", "year" ] +case = "lowercase" -- cgit v1.2.3 From c69b1789fabaf149916d160922d7026f2cbe33f1 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 14:57:53 +0200 Subject: implement const of ignored special chars for citekey formatting * the list contains 33 special chars at the moment * it will only affect already existing special chars in biblatex fields * delimiter specified for citekey formatting are not affected * char count is also not affected, ignored chars are not counted --- src/bibiman/citekeys.rs | 40 +++++++++++++++++++++------------------- src/config.rs | 5 +++++ tests/test-config.toml | 2 +- 3 files changed, 27 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 5121741..7c06886 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -31,7 +31,10 @@ use owo_colors::{ }; use serde::{Deserialize, Serialize}; -use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; +use crate::{ + bibiman::sanitize::sanitize_single_string_fully, + config::{BibiConfig, IGNORED_SPECIAL_CHARS}, +}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum CitekeyCase { @@ -354,26 +357,25 @@ fn preformat_field(field: &str, entry: &Entry) -> String { /// Cut of word at char count index if its set fn format_word(word: &str, count: Option) -> String { - if let Some(len) = count - && len < word.chars().count() - { - // Since chars can consist of multiple bytes, we need this more complex - // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) - // instead of simple byte indexing - let mut word_slice = String::new(); - let word_chars = word.chars(); - let mut counter = 0; - for c in word_chars { - if counter == len { - break; - } - word_slice.push(c); - counter += 1; + // Since chars can consist of multiple bytes, we need this more complex + // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) + // instead of simple byte indexing + let mut word_slice = String::new(); + let word_chars = word.chars(); + let mut counter = 0; + for c in word_chars { + if let Some(len) = count + && counter == len + { + break; } - word_slice - } else { - word.to_string() + if IGNORED_SPECIAL_CHARS.contains(&c) { + continue; + } + word_slice.push(c); + counter += 1; } + word_slice } /// Split a formatting pattern of kind diff --git a/src/config.rs b/src/config.rs index a5df61c..a4e89be 100644 --- a/src/config.rs +++ b/src/config.rs @@ -35,6 +35,11 @@ use crate::{ cliargs::CLIArgs, }; +pub const IGNORED_SPECIAL_CHARS: [char; 33] = [ + '?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%', + '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', +]; + const DEFAULT_CONFIG: &str = r##" # [general] ## Default files/dirs which are loaded on startup diff --git a/tests/test-config.toml b/tests/test-config.toml index b484b69..558d216 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,5 +61,5 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = [ "author;2;;-;_", "title;3;3;_;_", "year" ] +fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] case = "lowercase" -- cgit v1.2.3 From 418d2f3874c8e86c4b58143115ee3d4181130f9c Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 15:09:48 +0200 Subject: add dry-run information to --help function --- src/bibiman/citekeys.rs | 11 +++++++++++ src/config.rs | 15 +++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 7c06886..f7704fb 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -238,6 +238,17 @@ fn formatting_help() { ", "-h, --help".fg::().bold() ), + formatdoc!( + " + \t{} + \tDon't apply any changes to the named files. Instead print all + \told citekeys and the formatted strings that would have been + \tapplied in the format: {} => {} + ", + "-d, --dry-run".fg::().bold(), + "old_key".italic(), + "new_key".bold() + ), formatdoc! {" \t{} \tThe bibfile for which the citekey formatting should be processed. diff --git a/src/config.rs b/src/config.rs index a4e89be..b1c4b07 100644 --- a/src/config.rs +++ b/src/config.rs @@ -103,6 +103,21 @@ const DEFAULT_CONFIG: &str = r##" # author_color = "38" # title_color = "37" # year_color = "135" + +# [citekey_formatter] +## Define the patterns for creating citekeys. Every item of the array consists of +## five components separated by semicolons. Despite the field name every component +## can be left blank: +## - name of the biblatex field ("author", "title"...) +## - number of max words from the given field +## - number of chars used from each word +## - delimiter to separate words of the same field +## - trailing delimiter separating the current field from the following +# fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] + +## Convert chars to specified case. Possible values: +## "upper", "uppercase", "lower", "lowercase" +# case = "lowercase" "##; /// Main struct of the config file. Contains substructs/headings in toml -- cgit v1.2.3 From 211d556d6dc7132442c90b3ae6ac7485cf30ac5e Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 18:22:26 +0200 Subject: trimming citekey if last field is empty: WIP --- src/bibiman/citekeys.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index f7704fb..1f16b48 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -273,7 +273,8 @@ fn formatting_help() { /// Build the citekey from the patterns defined in the config file fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { let mut new_citekey = String::new(); - for pattern in pattern_fields.iter() { + let fields = pattern_fields.len(); + for (idx, pattern) in pattern_fields.iter().enumerate() { let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = split_formatting_pat(pattern); let formatted_field_str = { -- cgit v1.2.3 From 49d9a57bd15565116a51380d3552201b4a2de57b Mon Sep 17 00:00:00 2001 From: lukeflo Date: Sun, 12 Oct 2025 14:05:47 +0200 Subject: pop trailing delimiter if last field is empty --- src/bibiman/citekeys.rs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 1f16b48..065d57f 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -272,14 +272,27 @@ fn formatting_help() { /// Build the citekey from the patterns defined in the config file fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { + // mut string the citekey is built from let mut new_citekey = String::new(); + + // count different fields of pattern vec let fields = pattern_fields.len(); + + // loop over pattern fields process them for (idx, pattern) in pattern_fields.iter().enumerate() { - let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = + // parse single values from pattern field + let (field_name, word_count, char_count, inner_delimiter, trailing_delimiter) = split_formatting_pat(pattern); + + // built the part of the citekey from the current pattern field let formatted_field_str = { let mut formatted_str = String::new(); - let field = preformat_field(field, entry); + + // preformat the field depending on biblatex value + let field = preformat_field(field_name, entry); + + // split at whitespaces, count fields and set counter for processed + // splits let mut split_field = field.split_whitespace(); let mut words_passed = 0; let field_count = field.split_whitespace().count(); @@ -290,10 +303,19 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey } else { field_count }; + + // loop over single parts of current field and add correct delimiter loop { + // terminate loop for current field if its empty. If its also the + // last of the pattern vec, pop the trailing delimiter if field.is_empty() { + if idx + 1 == fields { + let _ = new_citekey.pop(); + } break; } + + // process the single slices and add correct delimiter if let Some(field_slice) = split_field.next() { formatted_str = formatted_str + &format_word(field_slice, char_count); words_passed += 1; -- cgit v1.2.3 From f112c4e13009e5ddfe3cf5c4cbe7f29f832b8553 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Sun, 12 Oct 2025 21:51:21 +0200 Subject: solve double delimiters with empty fields --- src/bibiman/citekeys.rs | 36 ++++++++++++++++++++++++++++++------ tests/test-config.toml | 2 +- 2 files changed, 31 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 065d57f..9d17403 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -317,13 +317,37 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey // process the single slices and add correct delimiter if let Some(field_slice) = split_field.next() { - formatted_str = formatted_str + &format_word(field_slice, char_count); - words_passed += 1; - if word_count == words_passed { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; + // Create word slice char by char. We need to loop over chars + // instead of a simple bytes index to also catch chars which + // consist of more than one byte (äöüøæ etc...) + let mut word_slice = String::new(); + let word_chars = field_slice.chars(); + let mut counter = 0; + for c in word_chars { + if let Some(len) = char_count + && counter == len + { + break; + } + // if a word slice contains a special char, skip it + if IGNORED_SPECIAL_CHARS.contains(&c) { + continue; + } + word_slice.push(c); + counter += 1; + } + // Don't count empty slices and don't add delimiter to those + if !word_slice.is_empty() { + formatted_str = formatted_str + &word_slice; + words_passed += 1; + if word_count == words_passed { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; + } else { + formatted_str = formatted_str + inner_delimiter.unwrap_or(""); + } } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or("") + continue; } } else { formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); diff --git a/tests/test-config.toml b/tests/test-config.toml index 558d216..2c5ac96 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,5 +61,5 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] +fields = [ "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ] case = "lowercase" -- cgit v1.2.3 From 0a8805acfb6fbb3d3a8c22f4ccbaf692a73cddfb Mon Sep 17 00:00:00 2001 From: lukeflo Date: Sun, 12 Oct 2025 23:01:17 +0200 Subject: ignore list for words, but need to solve inner delimiter problem for words ignored --- src/bibiman/citekeys.rs | 317 ++------------------------------ src/bibiman/citekeys/citekey_utils.rs | 327 ++++++++++++++++++++++++++++++++++ tests/test-config.toml | 7 +- 3 files changed, 348 insertions(+), 303 deletions(-) create mode 100644 src/bibiman/citekeys/citekey_utils.rs (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 9d17403..2f56947 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -21,27 +21,33 @@ use std::{ path::{Path, PathBuf}, }; -use biblatex::{Bibliography, ChunksExt, Entry, Type}; +use biblatex::Bibliography; use color_eyre::eyre::{OptionExt, eyre}; -use indoc::formatdoc; use lexopt::Arg::{Long, Short}; -use owo_colors::{ - OwoColorize, - colors::{BrightBlue, Green, White}, -}; +use owo_colors::OwoColorize; use serde::{Deserialize, Serialize}; use crate::{ - bibiman::sanitize::sanitize_single_string_fully, - config::{BibiConfig, IGNORED_SPECIAL_CHARS}, + bibiman::citekeys::citekey_utils::{build_citekey, formatting_help}, + config::BibiConfig, }; +mod citekey_utils; + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum CitekeyCase { #[serde(alias = "uppercase", alias = "upper")] Upper, #[serde(alias = "lowercase", alias = "lower")] Lower, + #[serde( + alias = "camel", + alias = "camelcase", + alias = "camel_case", + alias = "uppercamelcase", + alias = "upper_camel_case" + )] + Camel, } #[derive(Debug, Default, Clone)] @@ -203,306 +209,13 @@ impl CitekeyFormatting { } } -fn formatting_help() { - let help = vec![ - formatdoc!( - "{} {}\n", - env!("CARGO_PKG_NAME").fg::().bold(), - env!("CARGO_PKG_VERSION") - ), - formatdoc!("{}", "USAGE".bold()), - formatdoc!( - "\t{} {} {} {}\n", - env!("CARGO_PKG_NAME").fg::().bold(), - "format-citekeys".bold(), - "--source=".bold(), - "--output=".bold() - ), - formatdoc!( - " - \tThis help describes the CLI usage for the citekey formatting - \tfunctionality of bibiman. The definition of patterns how the - \tcitekeys should be formatted must be set in the config file. - \tFor further informations how to use this patterns etc. see: - \t{} - ", - "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman" - .italic() - .fg::() - ), - formatdoc!("{}", "OPTIONS".bold()), - formatdoc!( - " - \t{} - \tShow this help and exit - ", - "-h, --help".fg::().bold() - ), - formatdoc!( - " - \t{} - \tDon't apply any changes to the named files. Instead print all - \told citekeys and the formatted strings that would have been - \tapplied in the format: {} => {} - ", - "-d, --dry-run".fg::().bold(), - "old_key".italic(), - "new_key".bold() - ), - formatdoc! {" - \t{} - \tThe bibfile for which the citekey formatting should be processed. - \tTakes a path as argument. - ", "-s, -f, --source=, --file=".fg::().bold()}, - formatdoc!( - " - \t{} - \tThe bibfile to which the updated content should be written. - \tTakes a path as argument. If the file doesn't exist, it will be - \tcreated. - \tIf the argument isn't used, the original file will be {}! - ", - "-t, -o, --target=, --output=".fg::().bold(), - "overwritten".italic(), - ), - ]; - let help = help.join("\n"); - println!("{}", help); -} - -/// Build the citekey from the patterns defined in the config file -fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String { - // mut string the citekey is built from - let mut new_citekey = String::new(); - - // count different fields of pattern vec - let fields = pattern_fields.len(); - - // loop over pattern fields process them - for (idx, pattern) in pattern_fields.iter().enumerate() { - // parse single values from pattern field - let (field_name, word_count, char_count, inner_delimiter, trailing_delimiter) = - split_formatting_pat(pattern); - - // built the part of the citekey from the current pattern field - let formatted_field_str = { - let mut formatted_str = String::new(); - - // preformat the field depending on biblatex value - let field = preformat_field(field_name, entry); - - // split at whitespaces, count fields and set counter for processed - // splits - let mut split_field = field.split_whitespace(); - let mut words_passed = 0; - let field_count = field.split_whitespace().count(); - let word_count = if let Some(val) = word_count - && val <= field_count - { - val - } else { - field_count - }; - - // loop over single parts of current field and add correct delimiter - loop { - // terminate loop for current field if its empty. If its also the - // last of the pattern vec, pop the trailing delimiter - if field.is_empty() { - if idx + 1 == fields { - let _ = new_citekey.pop(); - } - break; - } - - // process the single slices and add correct delimiter - if let Some(field_slice) = split_field.next() { - // Create word slice char by char. We need to loop over chars - // instead of a simple bytes index to also catch chars which - // consist of more than one byte (äöüøæ etc...) - let mut word_slice = String::new(); - let word_chars = field_slice.chars(); - let mut counter = 0; - for c in word_chars { - if let Some(len) = char_count - && counter == len - { - break; - } - // if a word slice contains a special char, skip it - if IGNORED_SPECIAL_CHARS.contains(&c) { - continue; - } - word_slice.push(c); - counter += 1; - } - // Don't count empty slices and don't add delimiter to those - if !word_slice.is_empty() { - formatted_str = formatted_str + &word_slice; - words_passed += 1; - if word_count == words_passed { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or(""); - } - } else { - continue; - } - } else { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - }; - } - formatted_str - }; - new_citekey = new_citekey + &formatted_field_str; - } - if let Some(case_format) = case { - match case_format { - CitekeyCase::Lower => new_citekey.to_lowercase(), - CitekeyCase::Upper => new_citekey.to_uppercase(), - } - } else { - new_citekey - } -} - -/// Preformat some fields which are very common to be used in citekeys -fn preformat_field(field: &str, entry: &Entry) -> String { - match field { - "title" => { - sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) - } - "author" => { - if let Ok(authors) = entry.author() { - let mut last_names = String::new(); - for a in authors.iter() { - last_names = last_names + &a.name + " "; - } - last_names - } else { - "".to_string() - } - } - "year" => { - if let Ok(date) = entry.date() { - date.to_chunks().format_verbatim()[..4].to_string() - } else { - entry.get_as::(field).unwrap_or("".into()) - } - } - "subtitle" => { - sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) - } - "editor" => { - if let Ok(editors) = entry.editors() { - let mut last_names = String::new(); - for editortypes in editors.iter() { - for e in editortypes.0.iter() { - last_names = last_names + &e.name + " "; - } - } - last_names - } else { - "".to_string() - } - } - "pubtype" | "entrytype" => entry.entry_type.to_string(), - _ => entry.get_as::(field).unwrap_or("".into()), - } -} - -/// Cut of word at char count index if its set -fn format_word(word: &str, count: Option) -> String { - // Since chars can consist of multiple bytes, we need this more complex - // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) - // instead of simple byte indexing - let mut word_slice = String::new(); - let word_chars = word.chars(); - let mut counter = 0; - for c in word_chars { - if let Some(len) = count - && counter == len - { - break; - } - if IGNORED_SPECIAL_CHARS.contains(&c) { - continue; - } - word_slice.push(c); - counter += 1; - } - word_slice -} - -/// Split a formatting pattern of kind -/// `;;;;`, -/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")` -fn split_formatting_pat( - pattern: &str, -) -> ( - &str, - Option, - Option, - Option<&str>, - Option<&str>, -) { - let mut splits = pattern.split(';'); - ( - splits - .next() - .expect("Need field value for formatting citekey"), - if let Some(next) = splits.next() - && next.len() > 0 - { - next.parse::().ok() - } else { - None - }, - if let Some(next) = splits.next() - && next.len() > 0 - { - next.parse::().ok() - } else { - None - }, - splits.next(), - splits.next(), - ) -} - #[cfg(test)] mod tests { use std::path::PathBuf; use biblatex::Bibliography; - use itertools::Itertools; - - use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat}; - - #[test] - fn split_citekey_pattern() { - let pattern = "title;3;5;_;_"; - - assert_eq!( - split_formatting_pat(pattern), - ("title", Some(3), Some(5), Some("_"), Some("_")) - ); - let pattern = "year"; - - assert_eq!( - split_formatting_pat(pattern), - ("year", None, None, None, None) - ); - - let pattern = "author;1;;;_"; - assert_eq!( - split_formatting_pat(pattern), - ("author", Some(1), None, Some(""), Some("_")) - ); - } + use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting}; #[test] fn format_citekey_test() { diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs new file mode 100644 index 0000000..ee2c849 --- /dev/null +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -0,0 +1,327 @@ +// bibiman - a TUI for managing BibLaTeX databases +// Copyright (C) 2025 lukeflo +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +///// + +use biblatex::{ChunksExt, Entry, Type}; +use indoc::formatdoc; +use owo_colors::{ + OwoColorize, + colors::{BrightBlue, Green, White}, +}; + +use crate::{ + bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}, + config::IGNORED_SPECIAL_CHARS, +}; + +const IGNORE_WORDS: [&str; 20] = [ + "the", "a", "an", "of", "for", "in", "at", "to", "and", "der", "die", "das", "ein", "eine", + "eines", "des", "auf", "und", "für", "vor", +]; + +pub(super) fn formatting_help() { + let help = vec![ + formatdoc!( + "{} {}\n", + env!("CARGO_PKG_NAME").fg::().bold(), + env!("CARGO_PKG_VERSION") + ), + formatdoc!("{}", "USAGE".bold()), + formatdoc!( + "\t{} {} {} {}\n", + env!("CARGO_PKG_NAME").fg::().bold(), + "format-citekeys".bold(), + "--source=".bold(), + "--output=".bold() + ), + formatdoc!( + " + \tThis help describes the CLI usage for the citekey formatting + \tfunctionality of bibiman. The definition of patterns how the + \tcitekeys should be formatted must be set in the config file. + \tFor further informations how to use this patterns etc. see: + \t{} + ", + "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman" + .italic() + .fg::() + ), + formatdoc!("{}", "OPTIONS".bold()), + formatdoc!( + " + \t{} + \tShow this help and exit + ", + "-h, --help".fg::().bold() + ), + formatdoc!( + " + \t{} + \tDon't apply any changes to the named files. Instead print all + \told citekeys and the formatted strings that would have been + \tapplied in the format: {} => {} + ", + "-d, --dry-run".fg::().bold(), + "old_key".italic(), + "new_key".bold() + ), + formatdoc! {" + \t{} + \tThe bibfile for which the citekey formatting should be processed. + \tTakes a path as argument. + ", "-s, -f, --source=, --file=".fg::().bold()}, + formatdoc!( + " + \t{} + \tThe bibfile to which the updated content should be written. + \tTakes a path as argument. If the file doesn't exist, it will be + \tcreated. + \tIf the argument isn't used, the original file will be {}! + ", + "-t, -o, --target=, --output=".fg::().bold(), + "overwritten".italic(), + ), + ]; + let help = help.join("\n"); + println!("{}", help); +} + +/// Build the citekey from the patterns defined in the config file +pub(super) fn build_citekey( + entry: &Entry, + pattern_fields: &[String], + case: Option<&CitekeyCase>, +) -> String { + // mut string the citekey is built from + let mut new_citekey = String::new(); + + // trailing delimiter of previous field + let mut trailing_delimiter: Option<&str> = None; + + // loop over pattern fields process them + 'field_loop: for pattern in pattern_fields.iter() { + // parse single values from pattern field + let (field_name, word_count, char_count, inner_delimiter, cur_trailing_delimiter) = + split_formatting_pat(pattern); + + // built the part of the citekey from the current pattern field + let formatted_field_str = { + let mut formatted_str = String::new(); + + // preformat the field depending on biblatex value + let field = preformat_field(field_name, entry); + + // split at whitespaces, count fields and set counter for processed + // splits + let mut split_field = field.split_whitespace(); + let mut words_passed = 0; + let field_count = field.split_whitespace().count(); + let word_count = if let Some(val) = word_count + && val <= field_count + { + val + } else { + field_count + }; + + // If there is a trailing delimiter from the previous field, push it + if let Some(del) = trailing_delimiter { + formatted_str = del.to_string(); + }; + + // If the current field isn't empty, set trailing delimiter for + // upcoming loop repitition. If it's empty, start next run of loop + // directly + if !field.is_empty() { + trailing_delimiter = cur_trailing_delimiter; + } else { + continue 'field_loop; + } + + // loop over single parts of current field and add correct delimiter + 'word_loop: loop { + // process the single slices and add correct delimiter + if let Some(field_slice) = split_field.next() { + // Create word slice char by char. We need to loop over chars + // instead of a simple bytes index to also catch chars which + // consist of more than one byte (äöüøæ etc...) + let mut word_slice = String::new(); + let word_chars = field_slice.chars(); + let mut counter = 0; + 'char_loop: for mut c in word_chars { + // If camelcase is set, force first char of word to uppercase + if counter == 0 && case == Some(&CitekeyCase::Camel) { + c = c.to_ascii_uppercase() + } + if let Some(len) = char_count + && counter == len + { + break 'char_loop; + } + // if a word slice contains a special char, skip it + if IGNORED_SPECIAL_CHARS.contains(&c) { + continue 'char_loop; + } + word_slice.push(c); + counter += 1; + } + // Don't count empty slices and don't add delimiter to those + if !word_slice.is_empty() { + formatted_str = formatted_str + &word_slice; + words_passed += 1; + if word_count == words_passed { + break 'word_loop; + } else { + formatted_str = formatted_str + inner_delimiter.unwrap_or(""); + } + } else { + continue 'word_loop; + } + } else { + break 'word_loop; + }; + } + formatted_str + }; + new_citekey = new_citekey + &formatted_field_str; + } + match case { + Some(CitekeyCase::Lower) => new_citekey.to_lowercase(), + Some(CitekeyCase::Upper) => new_citekey.to_uppercase(), + _ => new_citekey, + } +} + +/// Preformat some fields which are very common to be used in citekeys +pub(super) fn preformat_field(field: &str, entry: &Entry) -> String { + match field { + // Sanitize all macro code from string + "title" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) + } + // Get author names. Fall back to editors before setting empty string + "author" => { + if let Ok(authors) = entry.author() { + let mut last_names = String::new(); + for a in authors.iter() { + last_names = last_names + &a.name + " "; + } + last_names + } else if let Ok(editors) = entry.editors() { + let mut last_names = String::new(); + for editortypes in editors.iter() { + for e in editortypes.0.iter() { + last_names = last_names + &e.name + " "; + } + } + last_names + } else { + "".to_string() + } + } + // Get year of date field, fallback to year field + "year" => { + if let Ok(date) = entry.date() { + date.to_chunks().format_verbatim()[..4].to_string() + } else { + entry.get_as::(field).unwrap_or("".into()) + } + } + // Sanitize all macro code from string + "subtitle" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("".into())) + } + "editor" => { + if let Ok(editors) = entry.editors() { + let mut last_names = String::new(); + for editortypes in editors.iter() { + for e in editortypes.0.iter() { + last_names = last_names + &e.name + " "; + } + } + last_names + } else { + "".to_string() + } + } + "pubtype" | "entrytype" => entry.entry_type.to_string(), + _ => entry.get_as::(field).unwrap_or("".into()), + } +} + +/// Split a formatting pattern of kind +/// `;;;;`, +/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")` +pub(super) fn split_formatting_pat( + pattern: &str, +) -> ( + &str, + Option, + Option, + Option<&str>, + Option<&str>, +) { + let mut splits = pattern.split(';'); + ( + splits + .next() + .expect("Need field value for formatting citekey"), + if let Some(next) = splits.next() + && next.len() > 0 + { + next.parse::().ok() + } else { + None + }, + if let Some(next) = splits.next() + && next.len() > 0 + { + next.parse::().ok() + } else { + None + }, + splits.next(), + splits.next(), + ) +} + +#[cfg(test)] +mod test { + use crate::bibiman::citekeys::citekey_utils::split_formatting_pat; + + #[test] + fn split_citekey_pattern() { + let pattern = "title;3;5;_;_"; + + assert_eq!( + split_formatting_pat(pattern), + ("title", Some(3), Some(5), Some("_"), Some("_")) + ); + + let pattern = "year"; + + assert_eq!( + split_formatting_pat(pattern), + ("year", None, None, None, None) + ); + + let pattern = "author;1;;;_"; + assert_eq!( + split_formatting_pat(pattern), + ("author", Some(1), None, Some(""), Some("_")) + ); + } +} diff --git a/tests/test-config.toml b/tests/test-config.toml index 2c5ac96..d3e42c5 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,5 +61,10 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = [ "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ] +fields = ["entrytype;;;;:", "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ] +# fields = [ # CamelCase test +# "author;2;;;", +# "title;5;5;;", +# "year" +# ] case = "lowercase" -- cgit v1.2.3 From 467851007e1861834326deee3116aa88fe839f5a Mon Sep 17 00:00:00 2001 From: lukeflo Date: Mon, 13 Oct 2025 15:45:53 +0200 Subject: Working proof of concept of citekey formatting --- CITEKEYS.md | 215 +++++++++++++++ Cargo.lock | 7 + Cargo.toml | 1 + README.md | 20 ++ src/bibiman/citekeys.rs | 69 +++-- src/bibiman/citekeys/citekey_utils.rs | 105 ++++---- src/config.rs | 69 +++++ tests/biblatex-test-citekeys.bib | 476 ++++++++++++++++++++++++++++++++++ tests/test-config.toml | 5 +- 9 files changed, 889 insertions(+), 78 deletions(-) create mode 100644 CITEKEYS.md create mode 100644 tests/biblatex-test-citekeys.bib (limited to 'src') diff --git a/CITEKEYS.md b/CITEKEYS.md new file mode 100644 index 0000000..912326a --- /dev/null +++ b/CITEKEYS.md @@ -0,0 +1,215 @@ +# Formatting Citekeys + + + +- [Formatting Citekeys](#formatting-citekeys) + - [Settings](#settings) + - [Building Patterns](#building-patterns) + - [Ignore Lists and Char Case](#ignore-lists-and-char-case) + - [General Tipps](#general-tipps) + - [Examples](#examples) + + + +`bibiman` offers the possibility to create new citekeys from the fields of +BibLaTeX entries. This is done using an easy but powerful pattern-matching +syntax. + +## Settings + +All settings for the citekey generation have to be configured in the used config +file. The regular path is `XDG_CONFIG_DIR/bibiman/bibiman.toml`. But it can be +set dynamically with the `-c`/`--config=` global option. + +Following values can be set through the config file. A detailed explanation for +all fields follows below: + +```toml +[citekey_formatter] +fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] +case = "lowercase" +ascii_only = true +ignored_chars = [ + "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", "\"", +] +ignored_words = [ + "the", + "a", + "an", + "of", + "for", + "in", + "at", + "to", + "and", + "der", + "die", + "das", + "ein", + "eine", + "eines", + "des", + "auf", + "und", + "für", + "vor", +] +``` + +## Building Patterns + +The main aspect for generating citekeys are the field patterns. They can be set +through an array in the config file where every array-item represents a single +BibLaTeX field to be used for generating a part of the citekey. + +Every field pattern consists of the following five parts separated by +semicolons. The general pattern looks like this (every subfield is explained +below): + +*biblatex field name* **;** *max word count* **;** *max char count* **;** *inner delimiter* **;** *trailing delimiter* + +- **BibLaTeX field**: the first part represents the field name which value + should be used to generate the content part of the citekey. Theoretically, any + BibLaTeX field can be selected by name. But there are some fields which are + much more common than others; e.g. `author`, `editor`, `title`, `year`/`date` + or `entrytype`. Those very common fields are preprocessed; meaning that for + instance LaTeX macros are fully stripped from the strings, or that `editor` is + a fallback value for `author` if the latter is empty (however, setting + `editor` explicitly is still possible). Also using `year` will parse the + `date` field too, to ensure a year number. +- **Max Word**: Defines how many words should maximal be used from the named + field. E.g. if the title consists of five words, and the max counter is set to + `3` only the first three fields will be used. +- **Max Chars/Word**: Defines how many chars, counting from the start, of each + word will be used to build the citekey. If for instance the value is set to + `5`, only the first five chars of any word will be used. Thus, "archaeology" + would be stripped down to "archa". +- **Inner Delimiter**: Sets the delimiter char used between words from the + currently named field; e.g. to separate the words of the `title` field. +- **Trailing Delimiter**: Sets the delimiter which separates the current fields + value from the following. This delimiter is only printed if the following + field has some content. + +For example, to use the `title` field, print maximal three words and of those +only the first five chars, single words separated by underscore and the whole +field separated by equal sign, insert the following pattern field into the +`fields` array: + +`title;3;5;_;=` + +Except the BibLaTeX field name, all other parts of the pattern can be left +blank. If the field name is the only value set, semicolon delimiters are also +not necessary. But if only one of the following parts should be set, all +delimiters need to be used. E.g. those are both valid: `title` or `title;;;_;=`. +The first would print all words of the title, no matter the length, not +separated by any char. The last would also print all words of the title, but +single words separated by underscores and the whole pattern value separated from +the following by an equal sign. This is not valid: `title;;_` since `bibiman` +can't know if the underscore means a delimiter (and which) or the max char +count. + +The pattern array inside the config file takes multiple pattern fields like the +predecing. This allows an elaborated citekey pattern which takes into account +multiple fields. + +## Ignore Lists and Char Case + +Beside the field patterns there are some other options to define how citekeys +should be built. + +`ascii_only=` +: If set to `true`, which is the default, non-ascii chars are mapped to their + ascii equivalent. For example, the German `ä` would be mapped to `a`. The + Turkish `ş` or Greek `σ`/`ς` would be mapped to `s`. If set to `false` all are + kept as they are. But this could lead to errors running LaTeX on the file. + +`case=` +: If used, sets the case of the chars in the citekey. Valid values are + `uppercase`, `lowercase` or `camelcase`. Both first should be clear, the + latter means typical camel case also beginning the *first word* with an + uppercase letter; also referenced as upper camel case or Pascal case. + +`ignored_chars=` +: Defines chars which should be ignored during parsing (meaning not print them). + The default list contains 33 special chars and is part of the default config + file (in out-commented state). Be aware, setting this key will completely + overwrite the default list! + +`ignored_words=` +: A list of words which should be ignored parsing field values. The default list + contains about 20 very commonly used words in English and German; like + articles, pronouns or connector words. Like with `ignored_chars` setting this + key will completely overwrite the default list! + +## General Tipps + +- Most importantly: *always use the **`--dry-run`** option first*! This will + print a list of old and new values for all citekeys in the file without + changing anything. +- After finding a good overall pattern, *use the `--output=` option* to create a + new file and don't overwrite your existent file. Thus, your original file + isn't broken if the key formatter produces some unwanted output. +- Even very long patterns are possible, they are not encouraged, since it bloats + the bibfiles. +- The same accounts for *too short* patterns; if the pattern is to unspecific, + it bares the risk of producing doublettes (e.g. single author and year only). + But the citekey generator will not check for doublettes! +- It is possible to keep special chars and use them as delimiters. But this + might cause problems other programs and CLI tools in particular, since many + special chars are reserved for shell operations. For instance, it will very + likely break the note file feature of `bibiman` which doesn't accept many + special chars. + +## Examples + +To make the process more clear a few examples might help. Following bibfile is +assumed: + +```latex +@article{Bos2023, + title = {{LaTeX}, metadata, and publishing workflows}, + author = {Bos, Joppe W. and {McCurley}, Kevin S.}, + year = {2023}, + month = apr, + journal = {arXiv}, + number = {{arXiv}:2301.08277}, + doi = {10.48550/arXiv.2301.08277}, + url = {http://arxiv.org/abs/2301.08277}, + urldate = {2023-08-22}, + note = {type: article}, +} +@book{Bhambra2021, + title = {Colonialism and \textbf{Modern Social Theory}}, + author = {Bhambra, Gurminder K. and Holmwood, John}, + location = {Cambridge and Medford}, + publisher = {Polity Press}, + date = {2021}, + +``` + +And the following values set in the config file: + +```toml +fields = [ + # Just print the whole entrytype and a colon as trailing delimiter + "entrytype;;;;:", + # Print all author names in full length, names separated by dash, + # the whole field by underscore + "author;;;-;_", + # Print first 4 words of title, first 3 chars of every word only. Title words + # separated by equal sign, the whole field by underscore + "title;4;3;=;_", + # Print all words of location, but only first 4 chars of every word. Single words + # separated by colon, whole field by underscore + "location;;4;:;_", + # Just print the whole year + "year", +] +case = "lowercase" +ascii_only = true +``` + +The combination of those setting will produce the following citekeys: + +- **`article:bos-mccurley_lat=met=pub=wor_2023`** +- **`book:bhambra-holmwood_col=mod=soc=the_camb:medf_2021`** diff --git a/Cargo.lock b/Cargo.lock index a27636e..0adb4e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,6 +103,7 @@ dependencies = [ "biblatex", "color-eyre", "crossterm", + "deunicode", "dirs", "editor-command", "figment", @@ -323,6 +324,12 @@ dependencies = [ "syn", ] +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + [[package]] name = "dirs" version = "5.0.1" diff --git a/Cargo.toml b/Cargo.toml index abf1eee..0c07c51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ owo-colors = "4.2.2" logos = "0.15.1" phf = { version = "0.13.1", features = ["macros"] } indoc = "2.0.6" +deunicode = "1.6.2" [workspace.metadata.cross.target.aarch64-unknown-linux-gnu] # Install libssl-dev:arm64, see diff --git a/README.md b/README.md index 4929509..3fb81c8 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,11 @@ - [Ubuntu/Debian](#ubuntudebian) - [Void Linux](#void-linux) - [Usage](#usage) + - [CLI for citekey formatting](#cli-for-citekey-formatting) - [Configuration](#configuration) - [Location of Config File](#location-of-config-file) - [General Configuration](#general-configuration) + - [Citekey formatting](#citekey-formatting) - [Color Configuration](#color-configuration) - [Features](#features) - [Keybindings](#keybindings) @@ -196,6 +198,13 @@ bibman tests/multi-files/ bibiman tests/biblatex-test.bib tests/multi-files/ ``` +### CLI for citekey formatting + +Beside the TUI `bibiman` can format and replace citekeys. To make use of this +feature run the program with the `format-citekeys` subcommand. For more +information on this use `bibiman format-citekeys --help` and the +[docs](./CITEKEYS.md). + ## Configuration ### Location of Config File @@ -268,6 +277,11 @@ note_symbol = "󰧮" ## Possible values are "journaltitle", "organization", "instituion", "publisher" ## and "pubtype" (which is the default) custom_column = "pubtype" + +[citekey_formatter] +fields = [] +ascii_only = true +case = "lowercase" ``` `bibfiles` @@ -326,6 +340,12 @@ custom_column = "pubtype" good advice to use a rather wide terminal window when using a value like `journaltitle`. +### Citekey formatting + +`bibiman` now also offers a citekey generating feature. This enables to reformat +all citekeys based on an elaborated pattern matching syntax. For furthter +information and examples see the [docs](CITEKEYS.md). + ### Color Configuration Furthermore, it is now possible to customize the colors. The following values diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 2f56947..0cec28e 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -29,7 +29,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bibiman::citekeys::citekey_utils::{build_citekey, formatting_help}, - config::BibiConfig, + config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; mod citekey_utils; @@ -60,6 +60,7 @@ pub(crate) struct CitekeyFormatting { case: Option, old_new_keys_map: Vec<(String, String)>, dry_run: bool, + ascii_only: bool, } impl CitekeyFormatting { @@ -69,14 +70,15 @@ impl CitekeyFormatting { ) -> color_eyre::Result<()> { let mut formatter = CitekeyFormatting::default(); - formatter.fields = cfg - .citekey_formatter - .fields - .clone() - .ok_or_eyre("Need to define fields correctly in config file")?; + formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!( + "Need to define {} correctly in config file", + "citekey pattern fields".red() + ))?; formatter.case = cfg.citekey_formatter.case.clone(); + formatter.ascii_only = cfg.citekey_formatter.ascii_only; + if formatter.fields.is_empty() { return Err(eyre!( "To format all citekeys, you need to provide {} values in the config file", @@ -105,13 +107,26 @@ impl CitekeyFormatting { formatter.bib_entries = Bibliography::parse(&bibstring) .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?; + let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars { + chars.as_slice() + } else { + IGNORED_SPECIAL_CHARS.as_slice() + }; + + let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words { + words.as_slice() + } else { + &*IGNORED_WORDS.as_slice() + }; + formatter - .do_formatting() + .do_formatting(ignored_chars, ignored_words) .rev_sort_new_keys_by_len() .update_file()?; Ok(()) } + /// Start Citekey formatting with building a new instance of `CitekeyFormatting` /// Formatting is processed file by file, because `bibman` can handle /// multi-file setups. @@ -144,16 +159,24 @@ impl CitekeyFormatting { case: cfg.citekey_formatter.case.clone(), old_new_keys_map: Vec::new(), dry_run: false, + ascii_only: cfg.citekey_formatter.ascii_only, }) } /// Process the actual formatting. The citekey of every entry will be updated. - pub fn do_formatting(&mut self) -> &mut Self { + pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter() { old_new_keys.push(( entry.key.clone(), - build_citekey(entry, &self.fields, self.case.as_ref()), + build_citekey( + entry, + &self.fields, + self.case.as_ref(), + self.ascii_only, + ignored_chars, + ignored_words, + ), )); } @@ -215,12 +238,15 @@ mod tests { use biblatex::Bibliography; - use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting}; + use crate::{ + bibiman::citekeys::{CitekeyCase, CitekeyFormatting}, + config::{IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, + }; #[test] fn format_citekey_test() { let src = r" - @article{bos_latex_metadata_and_publishing_workflows_2023, + @article{Bos2023, title = {{LaTeX}, metadata, and publishing workflows}, author = {Bos, Joppe W. and {McCurley}, Kevin S.}, year = {2023}, @@ -232,7 +258,7 @@ mod tests { urldate = {2023-08-22}, note = {type: article}, } - @book{bhambra_colonialism_social_theory_2021, + @book{Bhambra2021, title = {Colonialism and \textbf{Modern Social Theory}}, author = {Bhambra, Gurminder K. and Holmwood, John}, location = {Cambridge and Medford}, @@ -247,29 +273,24 @@ mod tests { fields: vec![ "entrytype;;;;:".into(), "author;;;-;_".into(), - "title;4;3;_;_".into(), + "title;4;3;=;_".into(), "location;;4;:;_".into(), "year".into(), ], - case: None, + case: Some(CitekeyCase::Lower), old_new_keys_map: Vec::new(), dry_run: false, + ascii_only: true, }; - let _ = formatting_struct.do_formatting(); + let _ = formatting_struct + .do_formatting(IGNORED_SPECIAL_CHARS.as_slice(), &*IGNORED_WORDS.as_slice()); assert_eq!( formatting_struct.old_new_keys_map.get(0).unwrap().1, - "article:Bos-McCurley_LaT_met_and_pub_Empt_2023" + "article:bos-mccurley_lat=met=pub=wor_2023" ); assert_eq!( formatting_struct.old_new_keys_map.get(1).unwrap().1, - "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" - ); - formatting_struct.case = Some(CitekeyCase::Lower); - let _ = formatting_struct.do_formatting().rev_sort_new_keys_by_len(); - // now the longer citekey is processed first and its in lowercase! - assert_eq!( - formatting_struct.old_new_keys_map.get(0).unwrap().1, - "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" + "book:bhambra-holmwood_col=mod=soc=the_camb:medf_2021" ); } diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index ee2c849..5f70224 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -16,21 +16,14 @@ ///// use biblatex::{ChunksExt, Entry, Type}; +use deunicode::deunicode; use indoc::formatdoc; use owo_colors::{ OwoColorize, colors::{BrightBlue, Green, White}, }; -use crate::{ - bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}, - config::IGNORED_SPECIAL_CHARS, -}; - -const IGNORE_WORDS: [&str; 20] = [ - "the", "a", "an", "of", "for", "in", "at", "to", "and", "der", "die", "das", "ein", "eine", - "eines", "des", "auf", "und", "für", "vor", -]; +use crate::bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}; pub(super) fn formatting_help() { let help = vec![ @@ -104,6 +97,9 @@ pub(super) fn build_citekey( entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>, + ascii_only: bool, + ignored_chars: &[char], + ignored_words: &[String], ) -> String { // mut string the citekey is built from let mut new_citekey = String::new(); @@ -114,7 +110,7 @@ pub(super) fn build_citekey( // loop over pattern fields process them 'field_loop: for pattern in pattern_fields.iter() { // parse single values from pattern field - let (field_name, word_count, char_count, inner_delimiter, cur_trailing_delimiter) = + let (field_name, max_words, max_chars, inner_delimiter, cur_trailing_delimiter) = split_formatting_pat(pattern); // built the part of the citekey from the current pattern field @@ -126,16 +122,9 @@ pub(super) fn build_citekey( // split at whitespaces, count fields and set counter for processed // splits - let mut split_field = field.split_whitespace(); + let split_field = field.split_whitespace(); let mut words_passed = 0; let field_count = field.split_whitespace().count(); - let word_count = if let Some(val) = word_count - && val <= field_count - { - val - } else { - field_count - }; // If there is a trailing delimiter from the previous field, push it if let Some(del) = trailing_delimiter { @@ -152,47 +141,57 @@ pub(super) fn build_citekey( } // loop over single parts of current field and add correct delimiter - 'word_loop: loop { - // process the single slices and add correct delimiter - if let Some(field_slice) = split_field.next() { - // Create word slice char by char. We need to loop over chars - // instead of a simple bytes index to also catch chars which - // consist of more than one byte (äöüøæ etc...) - let mut word_slice = String::new(); - let word_chars = field_slice.chars(); - let mut counter = 0; - 'char_loop: for mut c in word_chars { - // If camelcase is set, force first char of word to uppercase - if counter == 0 && case == Some(&CitekeyCase::Camel) { - c = c.to_ascii_uppercase() - } - if let Some(len) = char_count - && counter == len - { - break 'char_loop; - } - // if a word slice contains a special char, skip it - if IGNORED_SPECIAL_CHARS.contains(&c) { - continue 'char_loop; - } + // process the single slices and add correct delimiter + 'word_loop: for (idx, field_slice) in split_field.enumerate() { + // if the current slice is a common word from the ignore list, + // skip it. + if ignored_words.contains(&field_slice.to_lowercase()) { + continue; + } + + // Create word slice char by char. We need to loop over chars + // instead of a simple bytes index to also catch chars which + // consist of more than one byte (äöüøæ etc...) + let mut word_slice = String::new(); + let word_chars = field_slice.chars(); + let mut counter = 0; + 'char_loop: for mut c in word_chars { + // If camelcase is set, force first char of word to uppercase + if counter == 0 && case == Some(&CitekeyCase::Camel) { + c = c.to_ascii_uppercase() + } + if let Some(len) = max_chars + && counter >= len + { + break 'char_loop; + } + // if a word slice contains a special char, skip it + if ignored_chars.contains(&c) { + continue 'char_loop; + } + // if non-ascii chars should be mapped, check if needed and do it + if let Some(chars) = deunicode::deunicode_char(c) + && ascii_only + { + word_slice.push_str(chars); + counter += chars.len(); + } else { word_slice.push(c); counter += 1; } - // Don't count empty slices and don't add delimiter to those - if !word_slice.is_empty() { - formatted_str = formatted_str + &word_slice; - words_passed += 1; - if word_count == words_passed { - break 'word_loop; - } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or(""); - } + } + // Don't count empty slices and don't add delimiter to those + if !word_slice.is_empty() { + formatted_str = formatted_str + &word_slice; + words_passed += 1; + if max_words.is_some_and(|max| max == words_passed) || idx + 1 == field_count { + break 'word_loop; } else { - continue 'word_loop; + formatted_str = formatted_str + inner_delimiter.unwrap_or(""); } } else { - break 'word_loop; - }; + continue 'word_loop; + } } formatted_str }; diff --git a/src/config.rs b/src/config.rs index b1c4b07..7c1a0f8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -20,6 +20,7 @@ use std::{ io::{Write, stdin}, path::PathBuf, str::FromStr, + sync::LazyLock, }; use color_eyre::{eyre::Result, owo_colors::OwoColorize}; @@ -40,6 +41,31 @@ pub const IGNORED_SPECIAL_CHARS: [char; 33] = [ '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', ]; +pub static IGNORED_WORDS: LazyLock> = LazyLock::new(|| { + vec![ + String::from("the"), + String::from("a"), + String::from("an"), + String::from("of"), + String::from("for"), + String::from("in"), + String::from("at"), + String::from("to"), + String::from("and"), + String::from("der"), + String::from("die"), + String::from("das"), + String::from("ein"), + String::from("eine"), + String::from("eines"), + String::from("des"), + String::from("auf"), + String::from("und"), + String::from("für"), + String::from("vor"), + ] +}); + const DEFAULT_CONFIG: &str = r##" # [general] ## Default files/dirs which are loaded on startup @@ -118,6 +144,40 @@ const DEFAULT_CONFIG: &str = r##" ## Convert chars to specified case. Possible values: ## "upper", "uppercase", "lower", "lowercase" # case = "lowercase" + +## Map all unicode chars to their pure ascii equivalent +# ascii_only = true + +## List of special chars that'll be ignored when building citekeys. +## A custom list will overwrite the default list +# ignored_chars = [ +# "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", """, +# ] + +## List of words that'll be ignored when building citekeys. +## A custom list will overwrite the default list +# ignored_words = [ +# "the", +# "a", +# "an", +# "of", +# "for", +# "in", +# "at", +# "to", +# "and", +# "der", +# "die", +# "das", +# "ein", +# "eine", +# "eines", +# "des", +# "auf", +# "und", +# "für", +# "vor", +# ] "##; /// Main struct of the config file. Contains substructs/headings in toml @@ -171,6 +231,9 @@ pub struct Colors { pub struct CitekeyFormatter { pub fields: Option>, pub case: Option, + pub ascii_only: bool, + pub ignored_chars: Option>, + pub ignored_words: Option>, } impl Default for BibiConfig { @@ -194,6 +257,9 @@ impl Default for BibiConfig { citekey_formatter: CitekeyFormatter { fields: None, case: None, + ascii_only: true, + ignored_chars: None, + ignored_words: None, }, } } @@ -224,6 +290,9 @@ impl BibiConfig { citekey_formatter: CitekeyFormatter { fields: None, case: None, + ascii_only: true, + ignored_chars: None, + ignored_words: None, }, } } diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib new file mode 100644 index 0000000..9767f97 --- /dev/null +++ b/tests/biblatex-test-citekeys.bib @@ -0,0 +1,476 @@ +@set{set, + entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006}, + annotation = {A \texttt{set} with three members.}, +} + +@set{set, + entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam}, + annotation = {A \texttt{set} with three members discussing the standard + model of particle physics.}, +} + +@collection{collection:matuz-miller_contem=litera=critic_1990gale, + title = {Contemporary Literary Criticism}, + year = {1990}, + location = {Detroit}, + publisher = {Gale}, + volume = {61}, + pages = {204--208}, + editor = {Matuz, Roger and Miller, Helen}, + keywords = {narration}, + langid = {english}, + langidopts = {variant=american}, + annotation = {A \texttt{collection} entry providing the excerpt information + for the \texttt{article:doody_heming=style=jakes_1974} entry. Note the format of the \texttt{ + pages} field}, +} + +@article{article:aksin-turkmen_effect=immobi=on_2006, + title = {Effect of immobilization on catalytic characteristics of saturated + {Pd-N}-heterocyclic carbenes in {Mizoroki-Heck} reactions}, + author = {Aks{\i}n, {\"O}zge and T{\"u}rkmen, Hayati and Artok , Levent and + { \c{C}}etinkaya, Bekir and Ni, Chaoying and B{\" u}y{ \"u}kg{\"u} + ng{ \" o}r, Orhan and {\"O}zkal, Erhan}, + volume = {691}, + number = {13}, + pages = {3027--3036}, + journaltitle = jomch, + date = {2006}, + indextitle = {Effect of immobilization on catalytic characteristics}, +} + +@article{article:angenendt_honore=salvat=vom_2002, + title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der Patrozinienkunde}, + shorttitle = {In Honore Salvatoris}, + author = {Angenendt, Arnold}, + volume = {97}, + pages = {431--456, 791--823}, + journaltitle = {Revue d'Histoire Eccl{\'e}siastique}, + date = {2002}, + langid = {german}, + indextitle = {In Honore Salvatoris}, + annotation = {A German article in a French journal. Apart from that, a + typical \texttt{article} entry. Note the \texttt{indextitle} + field}, +} + +@book{book:aristotle_de=anima_1907cambr#unive#press, + title = {De Anima}, + author = {Aristotle}, + location = {Cambridge}, + publisher = cup, + date = {1907}, + editor = {Hicks, Robert Drew}, + keywords = {primary, ancient, philosophy, athens}, + langid = {english}, + langidopts = {variant=british}, + annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ + editor}}, +} + +@book{book:aristotle_physic_1929g#p#putna, + title = {Physics}, + shorttitle = {Physics}, + author = {Aristotle}, + location = {New York}, + publisher = {G. P. Putnam}, + url = {https://www.infobooks.org/authors/classic/aristotle-books/#Physic}, + date = {1929}, + translator = {Wicksteed, P. H. and Cornford, F. M.}, + keywords = {primary, ancient, philosophy}, + langid = {english}, + langidopts = {variant=american}, + file = {~/Documents/coding/projects/bibiman/tests/book:aristotle_physic_1929g#p#putna.pdf}, + annotation = {A \texttt{book} entry with a \texttt{translator} field}, + abstract = {The Physics is a work by Aristotle dedicated to the study of + nature. Regarded by Heidegger as "the fundamental work of Western + philosophy", it presents the renowned distinction between the + four types of cause, as well as reflections on chance, motion, + infinity, and other fundamental concepts. It is here that + Aristotle sets out his celebrated paradox of time.}, +} + +@book{book:aristotle_poetic_1968clare#press, + title = {Poetics}, + shorttitle = {Poetics}, + author = {Aristotle}, + location = {Oxford}, + publisher = {Clarendon Press}, + series = {Clarendon {Aristotle}}, + date = {1968}, + editor = {Lucas, D. W.}, + keywords = {primary}, + langid = {english}, + langidopts = {variant=british}, + annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ + editor} as well as a \texttt{series} field}, +} + +@mvbook{mvbook:aristotle_rhetor=aristo=with_1877cambr#unive#press, + title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward + Meredith Cope}}, + shorttitle = {Rhetoric}, + author = {Aristotle}, + publisher = cup, + date = {1877}, + editor = {Cope, Edward Meredith}, + commentator = {Cope, Edward Meredith}, + volumes = {3}, + keywords = {primary}, + langid = {english}, + langidopts = {variant=british}, + sorttitle = {Rhetoric of Aristotle}, + indextitle = {Rhetoric of {Aristotle}, The}, + annotation = {A commented edition. Note the concatenation of the \texttt{ + editor} and \texttt{commentator} fields as well as the \texttt{ + volumes}, \texttt{sorttitle}, and \texttt{indextitle} fields}, +} + +@book{book:augustine_hetero=cataly=synthe_1995marce#dekke, + title = {Heterogeneous catalysis for the synthetic \textit{chemist}}, + shorttitle = {Heterogeneous catalysis}, + author = {Augustine, Robert L.}, + location = {New York}, + publisher = {Marcel Dekker}, + date = {1995}, + langid = {english}, + langidopts = {variant=american}, + annotation = {A plain \texttt{book} entry}, + keywords = {chemistry}, +} + +@book{book:averroes_epistl=on=possib_1982jewis#theol#semin#ameri, + title = {The Epistle on the Possibility of Conjunction with the Active + Intellect by {Ibn Rushd} with the Commentary of {Moses Narboni}}, + shorttitle = {Possibility of Conjunction}, + author = {Averroes}, + location = {New York}, + publisher = {Jewish Theological Seminary of America}, + series = {Moreshet: Studies in {Jewish} History, Literature and Thought}, + number = {7}, + date = {1982}, + editor = {Bland, Kalman P.}, + translator = {Bland, Kalman P.}, + keywords = {primary}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Epistle on the Possibility of Conjunction, The}, + annotation = {A \texttt{book} entry with a \texttt{series} and a \texttt{ + number}. Note the concatenation of the \texttt{editor} and + \texttt{translator} fields as well as the \texttt{indextitle} + field}, +} + +@article{article:baez-lauda_higher=algebr=v_2004, + title = {Higher-Dimensional Algebra {V}: 2-Groups}, + author = {Baez, John C. and Lauda, Aaron D.}, + volume = {12}, + pages = {423--491}, + journaltitle = {Theory and Applications of Categories}, + date = {2004}, + version = {3}, + eprint = {math/0307200v3}, + eprinttype = {arxiv}, + langid = {english}, + keywords = {math}, + langidopts = {variant=american}, + annotation = {An \texttt{article} with \texttt{eprint} and \texttt{ + eprinttype} fields. Note that the arXiv reference is + transformed into a clickable link if \texttt{hyperref} support + has been enabled. Compare \texttt{baez\slash online}, which is + the same item given as an \texttt{online} entry}, +} + +@article{article:bertram-wentworth_gromov=invari=holomo_1996, + title = {Gromov invariants for holomorphic maps on {Riemann} surfaces}, + shorttitle = {Gromov invariants}, + author = {Bertram, Aaron and Wentworth, Richard}, + volume = {9}, + number = {2}, + pages = {529--571}, + journaltitle = jams, + date = {1996}, + langid = {english}, + langidopts = {variant=american}, + annotation = {An \texttt{article} entry with a \texttt{volume} and a \texttt + {number} field}, +} + +@article{article:doody_heming=style=jakes_1974, + title = {Hemingway's Style and {Jake's} Narration}, + author = {Doody, Terrence}, + year = {1974}, + journal = {The Journal of Narrative Technique}, + volume = {4}, + number = {3}, + pages = {212--225}, + langid = {english}, + langidopts = {variant=american}, + related = {matuz:article:doody_heming=style=jakes_1974}, + relatedstring = {\autocap{e}xcerpt in}, + annotation = {An \texttt{article} entry cited as an excerpt from a \texttt{ + collection} entry. Note the format of the \texttt{related} and + \texttt{relatedstring} fields}, +} + +@article{article:gillies_herder=prepar=goethe_1933, + title = {Herder and the Preparation of {Goethe's} Idea of World Literature}, + author = {Gillies, Alexander}, + series = {newseries}, + volume = {9}, + pages = {46--67}, + journaltitle = {Publications of the English Goethe Society}, + date = {1933}, + langid = {english}, + langidopts = {variant=british}, + annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt + {volume} field. Note that format of the \texttt{series} field + in the database file}, +} + +@article{article:glashow_partia=symmet=weak_1961, + title = {Partial Symmetries of Weak Interactions}, + author = {Glashow, Sheldon}, + volume = {22}, + pages = {579--588}, + journaltitle = {Nucl.~Phys.}, + date = {1961}, +} + +@article{article:herrmann-ofele_carboc=carben=as_2006, + title = {A carbocyclic carbene as an efficient catalyst ligand for {C--C} + coupling reactions}, + author = {Herrmann, Wolfgang A. and {\"O}fele, Karl and Schneider, Sabine K. + and Herdtweck, Eberhardt and Hoffmann, Stephan D.}, + volume = {45}, + number = {23}, + pages = {3859--3862}, + journaltitle = anch-ie, + date = {2006}, + indextitle = {Carbocyclic carbene as an efficient catalyst, A}, +} + +@article{article:hostetler-wingate_alkane=gold=cluste_1998, + title = {Alkanethiolate gold cluster molecules with core diameters from 1.5 + to 5.2~{nm}}, + shorttitle = {Alkanethiolate gold cluster molecules}, + author = {Hostetler, Michael J. and Wingate, Julia E. and Zhong, Chuan-Jian + and Harris, Jay E. and Vachet, Richard W. and Clark, Michael R. and + Londono, J. David and Green, Stephen J. and Stokes, Jennifer J. and + Wignall, George D. and Glish, Gary L. and Porter, Marc D. and Evans + , Neal D. and Murray, Royce W.}, + volume = {14}, + number = {1}, + pages = {17--30}, + journaltitle = {Langmuir}, + date = {1998}, + subtitle = {Core and monolayer properties as a function of core size}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Alkanethiolate gold cluster molecules}, + annotation = {An \texttt{article} entry with \arabic{author} authors. By + default, long author and editor lists are automatically + truncated. This is configurable}, +} + +@article{article:kastenholz-hunenberger_comput=method=ionic_2006, + title = {Computation of methodology\hyphen independent ionic solvation free + energies from molecular simulations}, + author = {Kastenholz, M. A. and H{\"u}nenberger, Philippe H.}, + volume = {124}, + doi = {10.1063/1.2172593}, + journaltitle = jchph, + date = {2006}, + subtitle = {{I}. {The} electrostatic potential in molecular liquids}, + eid = {124106}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Computation of ionic solvation free energies}, + annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{ + doi} field. Note that the \textsc{doi} is transformed into a + clickable link if \texttt{hyperref} support has been enabled}, + abstract = {The computation of \texttt{ionic} solvation free energies from atomistic + simulations is a surprisingly difficult problem that has found no + satisfactory solution for more than 15 years. The reason is that + the charging free energies evaluated from such simulations are + affected by very large errors. One of these is related to the + choice of a specific convention for summing up the contributions + of solvent charges to the electrostatic potential in the ionic + cavity, namely, on the basis of point charges within entire + solvent molecules (M scheme) or on the basis of individual point + charges (P scheme). The use of an inappropriate convention may + lead to a charge-independent offset in the calculated potential, + which depends on the details of the summation scheme, on the + quadrupole-moment trace of the solvent molecule, and on the + approximate form used to represent electrostatic interactions in + the system. However, whether the M or P scheme (if any) + represents the appropriate convention is still a matter of + on-going debate. The goal of the present article is to settle + this long-standing controversy by carefully analyzing (both + analytically and numerically) the properties of the electrostatic + potential in molecular liquids (and inside cavities within them). + }, +} + +@article{article:sarfraz-razzak_techni=sectio=algori_2002, + title = {Technical section: {An} algorithm for automatic capturing of the + font outlines}, + author = {M. Sarfraz and M. F. A. Razzak}, + year = {2002}, + journal = {Computers and Graphics}, + volume = {26}, + number = {5}, + pages = {795--804}, + issn = {0097-8493}, + annotation = {An \texttt{article} entry with an \texttt{issn} field}, +} + +@article{article:reese_georgi=anglos=diplom_1958, + title = {Georgia in {Anglo-Spanish} Diplomacy, 1736--1739}, + author = {Reese, Trevor R.}, + series = {3}, + volume = {15}, + pages = {168--190}, + journaltitle = {William and Mary Quarterly}, + date = {1958}, + langid = {english}, + langidopts = {variant=american}, + annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt + {volume} field. Note the format of the series. If the value of + the \texttt{series} field is an integer, this number is printed + as an ordinal and the string \enquote*{series} is appended + automatically}, +} + +@article{article:shore_twiceb=once=concei_1991, + title = {Twice-Born, Once Conceived}, + author = {Shore, Bradd}, + series = {newseries}, + volume = {93}, + number = {1}, + pages = {9--27}, + journaltitle = {American Anthropologist}, + date = {1991-03}, + subtitle = {Meaning Construction and Cultural Cognition}, + annotation = {An \texttt{article} entry with \texttt{series}, \texttt{volume + }, and \texttt{number} fields. Note the format of the \texttt{ + series} which is a localization key}, +} + +@article{article:sigfridsson-ryde_compar=method=derivi_1998, + title = {Comparison of methods for deriving atomic charges from the + electrostatic potential and moments}, + author = {Sigfridsson, Emma and Ryde, Ulf}, + volume = {19}, + number = {4}, + pages = {377--395}, + doi = {10.1002/(SICI)1096-987X(199803)19:4<377::AID-JCC1>3.0.CO;2-P}, + journaltitle = {Journal of Computational Chemistry}, + date = {1998}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Methods for deriving atomic charges}, + annotation = {An \texttt{article} entry with \texttt{volume}, \texttt{number + }, and \texttt{doi} fields. Note that the \textsc{doi} is + transformed into a clickable link if \texttt{hyperref} support + has been enabled}, + abstract = {Four methods for deriving partial atomic charges from the + quantum chemical electrostatic potential (CHELP, CHELPG, + Merz-Kollman, and RESP) have been compared and critically + evaluated. It is shown that charges strongly depend on how and + where the potential points are selected. Two alternative methods + are suggested to avoid the arbitrariness in the point-selection + schemes and van der Waals exclusion radii: CHELP-BOW, which also + estimates the charges from the electrostatic potential, but with + potential points that are Boltzmann-weighted after their + occurrence in actual simulations using the energy function of the + program in which the charges will be used, and CHELMO, which + estimates the charges directly from the electrostatic multipole + moments. Different criteria for the quality of the charges are + discussed.}, +} + +@article{article:spiegelberg_intent=intent=schola_1969, + title = {\mkbibquote{Intention} und \mkbibquote{Intentionalit{\"a}t} in der + Scholastik, bei Brentano und Husserl}, + shorttitle = {Intention und Intentionalit{\"a}t}, + author = {Spiegelberg, Herbert}, + volume = {29}, + pages = {189--216}, + journaltitle = {Studia Philosophica}, + date = {1969}, + langid = {german}, + sorttitle = {Intention und Intentionalitat in der Scholastik, bei Brentano + und Husserl}, + indexsorttitle = {Intention und Intentionalitat in der Scholastik, bei + Brentano und Husserl}, + annotation = {An \texttt{article} entry. Note the \texttt{sorttitle} and + \texttt{indexsorttitle} fields and the markup of the quotes in + the database file}, +} + +@article{article:springer_mediae=pilgri=routes_1950, + title = {Mediaeval Pilgrim Routes from {Scandinavia} to {Rome}}, + shorttitle = {Mediaeval Pilgrim Routes}, + author = {Springer, Otto}, + volume = {12}, + pages = {92--122}, + journaltitle = {Mediaeval Studies}, + date = {1950}, + langid = {english}, + langidopts = {variant=british}, + annotation = {A plain \texttt{article} entry}, +} + +@article{article:weinberg_model=lepton_1967, + title = {A Model of Leptons}, + author = {Weinberg, Steven}, + volume = {19}, + pages = {1264--1266}, + journaltitle = {Phys.~Rev.~Lett.}, + date = {1967}, +} + +@string{anch-ie = {Angew.~Chem. Int.~Ed.}} + +@string{cup = {Cambridge University Press}} + +@string{dtv = {Deutscher Taschenbuch-Verlag}} + +@string{hup = {Harvard University Press}} + +@string{jams = {J.~Amer. Math. Soc.}} + +@string{jchph = {J.~Chem. Phys.}} + +@string{jomch = {J.~Organomet. Chem.}} + +@string{pup = {Princeton University Press}} + +@incollection{incollection:westfahl_true=fronti, + title = {The True Frontier}, + author = {Westfahl, Gary}, + pages = {55--65}, + subtitle = {Confronting and Avoiding the Realities of Space in {American} + Science Fiction Films}, + crossref = {westfahl:frontier}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {True Frontier, The}, + annotation = {A cross-referenced article from a \texttt{collection}. This is + an \texttt{incollection} entry with a \texttt{crossref} field. + Note the \texttt{subtitle} and \texttt{indextitle} fields}, +} + +@article{article:yoon-ryu_pallad=pincer=comple_2006, + title = {Palladium pincer complexes with reduced bond angle strain: + efficient catalysts for the {Heck} reaction}, + author = {Yoon, Myeong S. and Ryu, Dowook and Kim, Jeongryul and Ahn, Kyo + Han}, + volume = {25}, + number = {10}, + pages = {2409--2411}, + journaltitle = {Organometallics}, + date = {2006}, + indextitle = {Palladium pincer complexes}, +} diff --git a/tests/test-config.toml b/tests/test-config.toml index d3e42c5..8dd8014 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,10 +61,13 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = ["entrytype;;;;:", "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ] +fields = ["shorthand;;;;+","entrytype;;;;:", "author;2;;-;_", "title;3;6;=;_", "year", "publisher;;5;#;" ] # fields = [ # CamelCase test # "author;2;;;", # "title;5;5;;", # "year" # ] case = "lowercase" +ascii_only = true +# ignored_words = ["the"] +# ignored_chars = ["?", "."] -- cgit v1.2.3 From c62b83e02359c24973344699116acc12b4a04108 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 08:54:35 +0200 Subject: skip set and xdata entries by default --- src/bibiman/citekeys.rs | 7 +++++-- src/bibiman/citekeys/citekey_utils.rs | 5 +++++ src/config.rs | 10 ++++++++++ tests/biblatex-test-citekeys.bib | 4 ++-- 4 files changed, 22 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 0cec28e..999c6cb 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -28,7 +28,7 @@ use owo_colors::OwoColorize; use serde::{Deserialize, Serialize}; use crate::{ - bibiman::citekeys::citekey_utils::{build_citekey, formatting_help}, + bibiman::citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help}, config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; @@ -167,6 +167,10 @@ impl CitekeyFormatting { pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter() { + // Skip specific entries + if SKIPPED_ENTRIES.contains(&entry.entry_type.to_string().to_lowercase().as_str()) { + continue; + } old_new_keys.push(( entry.key.clone(), build_citekey( @@ -181,7 +185,6 @@ impl CitekeyFormatting { } self.old_new_keys_map = old_new_keys; - // dbg!(&self.old_new_keys_map); self } diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index 5f70224..58a8274 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -15,6 +15,8 @@ // along with this program. If not, see . ///// +use std::sync::LazyLock; + use biblatex::{ChunksExt, Entry, Type}; use deunicode::deunicode; use indoc::formatdoc; @@ -25,6 +27,8 @@ use owo_colors::{ use crate::bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}; +pub(super) const SKIPPED_ENTRIES: [&str; 2] = ["set", "xdata"]; + pub(super) fn formatting_help() { let help = vec![ formatdoc!( @@ -200,6 +204,7 @@ pub(super) fn build_citekey( match case { Some(CitekeyCase::Lower) => new_citekey.to_lowercase(), Some(CitekeyCase::Upper) => new_citekey.to_uppercase(), + // otherwise skip, since camelcase is processed in char loop _ => new_citekey, } } diff --git a/src/config.rs b/src/config.rs index 7c1a0f8..b8d8b45 100644 --- a/src/config.rs +++ b/src/config.rs @@ -52,6 +52,10 @@ pub static IGNORED_WORDS: LazyLock> = LazyLock::new(|| { String::from("at"), String::from("to"), String::from("and"), + String::from("him"), + String::from("her"), + String::from("his"), + String::from("hers"), String::from("der"), String::from("die"), String::from("das"), @@ -63,6 +67,12 @@ pub static IGNORED_WORDS: LazyLock> = LazyLock::new(|| { String::from("und"), String::from("für"), String::from("vor"), + String::from("er"), + String::from("sie"), + String::from("es"), + String::from("ihm"), + String::from("ihr"), + String::from("ihnen"), ] }); diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib index 9767f97..34c2f33 100644 --- a/tests/biblatex-test-citekeys.bib +++ b/tests/biblatex-test-citekeys.bib @@ -1,9 +1,9 @@ -@set{set, +@set{SET, entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006}, annotation = {A \texttt{set} with three members.}, } -@set{set, +@set{stdmodel, entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam}, annotation = {A \texttt{set} with three members discussing the standard model of particle physics.}, -- cgit v1.2.3 From 9b21727bd151a3fda2133b9da12eec588068130e Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 14:30:56 +0200 Subject: use citekey formatter for adding new entries via doi --- src/bibiman.rs | 105 +++++++++++++----------------------------- src/bibiman/citekeys.rs | 118 ++++++++++++++++++++++++++---------------------- 2 files changed, 97 insertions(+), 126 deletions(-) (limited to 'src') diff --git a/src/bibiman.rs b/src/bibiman.rs index 3158d73..392ae95 100644 --- a/src/bibiman.rs +++ b/src/bibiman.rs @@ -16,22 +16,23 @@ ///// use crate::app::expand_home; +use crate::bibiman::citekeys::CitekeyFormatting; use crate::bibiman::entries::EntryTableColumn; use crate::bibiman::{bibisetup::*, search::BibiSearch}; use crate::cliargs::CLIArgs; use crate::config::BibiConfig; -use crate::tui::popup::{PopupArea, PopupItem, PopupKind}; use crate::tui::Tui; +use crate::tui::popup::{PopupArea, PopupItem, PopupKind}; use crate::{app, cliargs}; use crate::{bibiman::entries::EntryTable, bibiman::keywords::TagList}; use arboard::Clipboard; -use color_eyre::eyre::{Context, Error, Result}; +use biblatex::Bibliography; +use color_eyre::eyre::{Context, Error, Result, eyre}; use crossterm::event::KeyCode; use editor_command::EditorBuilder; use ratatui::widgets::ScrollbarState; -use regex::Regex; use std::ffi::OsStr; -use std::fs::{self, read_to_string}; +use std::fs::{self}; use std::fs::{File, OpenOptions}; use std::io::Write; use std::path::PathBuf; @@ -190,7 +191,9 @@ impl Bibiman { self.popup_area.popup_message = message.unwrap().to_owned(); Ok(()) } else { - Err(Error::msg("You need to past at least a message via Some(&str) to create a message popup")) + Err(Error::msg( + "You need to past at least a message via Some(&str) to create a message popup", + )) } } PopupKind::MessageError => { @@ -202,7 +205,9 @@ impl Bibiman { self.popup_area.popup_message = message.unwrap().to_owned(); Ok(()) } else { - Err(Error::msg("You need to past at least a message via Some(&str) to create a message popup")) + Err(Error::msg( + "You need to past at least a message via Some(&str) to create a message popup", + )) } } PopupKind::OpenRes => { @@ -680,23 +685,32 @@ impl Bibiman { // Index of selected popup field let popup_idx = self.popup_area.popup_state.selected().unwrap(); - // regex pattern to match citekey in fetched bibtexstring - let pattern = Regex::new(r"\{([^\{\},]*),").unwrap(); + let new_bib_entry = Bibliography::parse(&self.popup_area.popup_sel_item) + .map_err(|e| eyre!("Couldn't parse downloaded bib entry: {}", e.to_string()))?; - let citekey = pattern - .captures(&self.popup_area.popup_sel_item) - .unwrap() - .get(1) - .unwrap() - .as_str() - .to_string(); + let formatted_struct = + if let Some(formatter) = CitekeyFormatting::new(cfg, new_bib_entry.clone()) { + Some(formatter.do_formatting()) + } else { + None + }; + + let (new_citekey, entry_string) = if let Some(mut formatter) = formatted_struct { + ( + formatter.get_citekey_pair(0).unwrap().1, + formatter.print_updated_bib_as_string(), + ) + } else { + let keys = new_bib_entry.keys().collect::>(); + (keys[0].to_string(), new_bib_entry.to_biblatex_string()) + }; // Check if new file or existing file was choosen let mut file = if self.popup_area.popup_list[popup_idx] .0 .contains("Create new file") { - let citekey = PathBuf::from(&citekey); + let citekey = PathBuf::from(&new_citekey); // Get path of current files let path: PathBuf = if self.main_bibfiles[0].is_file() { self.main_bibfiles[0].parent().unwrap().to_owned() @@ -714,45 +728,18 @@ impl Bibiman { } else { let file_path = &self.main_bibfiles[popup_idx - 1]; - // Check if similar citekey already exists - let file_string = read_to_string(&file_path).unwrap(); - - // If choosen file contains entry with fetched citekey, append an - // char to the citekey so no dublettes are created - if file_string.contains(&citekey) { - let mut new_citekey = String::new(); - - // Loop over ASCII alpabetic chars and check again if citekey with - // appended char exists. If yes, move to next char and test again. - // If the citekey is free, use it and break the loop - for c in b'a'..=b'z' { - let append_char = (c as char).to_string(); - new_citekey = citekey.clone() + &append_char; - if !file_string.contains(&new_citekey) { - break; - } - } - - let new_entry_string_clone = self.popup_area.popup_sel_item.clone(); - - // Replace the double citekey with newly created - self.popup_area.popup_sel_item = pattern - .replace(&new_entry_string_clone, format!("{{{},", &new_citekey)) - .to_string(); - } - OpenOptions::new().append(true).open(file_path).unwrap() }; // Optionally, add a newline before the content file.write_all(b"\n")?; // Write content to file - file.write_all(self.popup_area.popup_sel_item.as_bytes())?; + file.write_all(entry_string.as_bytes())?; // Update the database and the lists to reflect the new content self.update_lists(cfg); self.close_popup(); // Select newly created entry - self.select_entry_by_citekey(&citekey); + self.select_entry_by_citekey(&new_citekey); Ok(()) } @@ -1285,38 +1272,10 @@ impl Bibiman { #[cfg(test)] mod tests { - use regex::Captures; - - use super::*; - #[test] fn citekey_pattern() { let citekey = format!("{{{},", "a_key_2001"); assert_eq!(citekey, "{a_key_2001,") } - - #[test] - fn regex_capture_citekey() { - let re = Regex::new(r"\{([^\{\},]*),").unwrap(); - - let bibstring = String::from("@article{citekey77_2001:!?, author = {Hanks, Tom}, title = {A great book}, year = {2001}}"); - - let citekey = re.captures(&bibstring).unwrap().get(1).unwrap().as_str(); - - assert_eq!(citekey, "citekey77_2001:!?"); - - if bibstring.contains(&citekey) { - let append_char = "a"; - let new_entry_string_clone = bibstring.clone(); - - let updated_bibstring = re - .replace(&new_entry_string_clone, |caps: &Captures| { - format!("{{{}{},", &caps[1], &append_char) - }) - .to_string(); - - assert_eq!(updated_bibstring, "@article{citekey77_2001:!?a, author = {Hanks, Tom}, title = {A great book}, year = {2001}}") - } - } } diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 999c6cb..4516b28 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -51,24 +51,27 @@ pub enum CitekeyCase { } #[derive(Debug, Default, Clone)] -pub(crate) struct CitekeyFormatting { +pub(crate) struct CitekeyFormatting<'a> { /// bibfile to replace keys at. The optional fields defines a differing /// output file to write to, otherwise original file will be overwritten. - bibfile_path: (PathBuf, Option), bib_entries: Bibliography, fields: Vec, case: Option, old_new_keys_map: Vec<(String, String)>, dry_run: bool, ascii_only: bool, + ignored_chars: &'a [char], + ignored_words: &'a [String], } -impl CitekeyFormatting { +impl<'a> CitekeyFormatting<'a> { pub(crate) fn parse_citekey_cli( parser: &mut lexopt::Parser, cfg: &BibiConfig, ) -> color_eyre::Result<()> { let mut formatter = CitekeyFormatting::default(); + let mut source_file = PathBuf::new(); + let mut target_file: Option = None; formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!( "Need to define {} correctly in config file", @@ -93,78 +96,73 @@ impl CitekeyFormatting { } Short('d') | Long("dry-run") => formatter.dry_run = true, Short('s') | Short('f') | Long("source") | Long("file") => { - formatter.bibfile_path.0 = parser.value()?.into() + source_file = parser.value()?.into() } Short('t') | Short('o') | Long("target") | Long("output") => { - formatter.bibfile_path.1 = Some(parser.value()?.into()) + target_file = Some(parser.value()?.into()) } _ => return Err(arg.unexpected().into()), } } - let bibstring = std::fs::read_to_string(&formatter.bibfile_path.0)?; + let bibstring = std::fs::read_to_string(&source_file)?; formatter.bib_entries = Bibliography::parse(&bibstring) .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?; - let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars { + formatter.ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars { chars.as_slice() } else { IGNORED_SPECIAL_CHARS.as_slice() }; - let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words { + formatter.ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words { words.as_slice() } else { &*IGNORED_WORDS.as_slice() }; formatter - .do_formatting(ignored_chars, ignored_words) + .do_formatting() .rev_sort_new_keys_by_len() - .update_file()?; + .update_file(source_file, target_file)?; Ok(()) } /// Start Citekey formatting with building a new instance of `CitekeyFormatting` - /// Formatting is processed file by file, because `bibman` can handle - /// multi-file setups. - /// The `Bibliography` inserted will be edited in place with the new citekeys. - /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography` - pub fn new>( - cfg: &BibiConfig, - path: P, - target: Option

, - bib_entries: Bibliography, - ) -> color_eyre::Result { - let fields = cfg - .citekey_formatter - .fields - .clone() - .expect("Need to define fields in config to format citekeys"); + pub fn new(cfg: &'a BibiConfig, bib_entries: Bibliography) -> Option { + let fields = cfg.citekey_formatter.fields.clone().unwrap_or(Vec::new()); if fields.is_empty() { - return Err(eyre!( - "To format all citekeys, you need to provide {} values in the config file", - "fields".bold() - )); + return None; } - Ok(Self { - bibfile_path: ( - path.as_ref().to_path_buf(), - target.map(|p| p.as_ref().to_path_buf()), - ), + let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars { + chars.as_slice() + } else { + IGNORED_SPECIAL_CHARS.as_slice() + }; + + let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words { + words.as_slice() + } else { + &*IGNORED_WORDS.as_slice() + }; + + Some(Self { bib_entries, fields, case: cfg.citekey_formatter.case.clone(), old_new_keys_map: Vec::new(), dry_run: false, ascii_only: cfg.citekey_formatter.ascii_only, + ignored_chars, + ignored_words, }) } - /// Process the actual formatting. The citekey of every entry will be updated. - pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self { + /// Process the actual formatting. Updated citekeys will be stored in a the + /// `self.old_new_keys_map` vector consisting of pairs (old key, new key). + pub fn do_formatting(mut self) -> Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter() { // Skip specific entries @@ -178,8 +176,8 @@ impl CitekeyFormatting { &self.fields, self.case.as_ref(), self.ascii_only, - ignored_chars, - ignored_words, + self.ignored_chars, + self.ignored_words, ), )); } @@ -189,8 +187,12 @@ impl CitekeyFormatting { self } - /// Write entries with updated citekeys to bibfile - pub fn update_file(&mut self) -> color_eyre::Result<()> { + /// Write formatted citekeys to bibfile replacing the old keys in all fields + pub fn update_file>( + &mut self, + source_file: P, + target_file: Option

, + ) -> color_eyre::Result<()> { if self.dry_run { println!("Following citekeys would be formatted: old => new\n"); self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0)); @@ -198,11 +200,10 @@ impl CitekeyFormatting { println!("{} => {}", old.italic(), new.bold()) } } else { - let source_file = self.bibfile_path.0.as_path(); - let target_file = if let Some(path) = &self.bibfile_path.1 { - path + let target_file = if let Some(path) = target_file { + path.as_ref().to_path_buf() } else { - source_file + source_file.as_ref().to_path_buf() }; let mut content = std::fs::read_to_string(source_file)?; @@ -228,23 +229,34 @@ impl CitekeyFormatting { /// You are **very encouraged** to call this method before `update_file()` to /// prevent replacing citekeys partly which afterwards wont match the pattern /// anymore. - pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self { + pub fn rev_sort_new_keys_by_len(mut self) -> Self { self.old_new_keys_map .sort_by(|a, b| b.0.len().cmp(&a.0.len())); self } + + /// Update the `Bibliography` of the `CitekeyFormatting` struct and return + /// it as `String`. + pub fn print_updated_bib_as_string(&mut self) -> String { + let mut content = self.bib_entries.to_biblatex_string(); + for (old_key, new_key) in self.old_new_keys_map.iter() { + content = content.replace(old_key, new_key); + } + content + } + + pub fn get_citekey_pair(&self, idx: usize) -> Option<(String, String)> { + self.old_new_keys_map.get(idx).map(|pair| pair.to_owned()) + } } #[cfg(test)] mod tests { - use std::path::PathBuf; - - use biblatex::Bibliography; - use crate::{ bibiman::citekeys::{CitekeyCase, CitekeyFormatting}, config::{IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; + use biblatex::Bibliography; #[test] fn format_citekey_test() { @@ -270,8 +282,7 @@ mod tests { } "; let bibliography = Bibliography::parse(src).unwrap(); - let mut formatting_struct = CitekeyFormatting { - bibfile_path: (PathBuf::new(), None), + let formatting_struct = CitekeyFormatting { bib_entries: bibliography, fields: vec![ "entrytype;;;;:".into(), @@ -284,9 +295,10 @@ mod tests { old_new_keys_map: Vec::new(), dry_run: false, ascii_only: true, + ignored_chars: &IGNORED_SPECIAL_CHARS, + ignored_words: &IGNORED_WORDS, }; - let _ = formatting_struct - .do_formatting(IGNORED_SPECIAL_CHARS.as_slice(), &*IGNORED_WORDS.as_slice()); + let formatting_struct = formatting_struct.do_formatting(); assert_eq!( formatting_struct.old_new_keys_map.get(0).unwrap().1, "article:bos-mccurley_lat=met=pub=wor_2023" -- cgit v1.2.3 From 3cd41cb1bc2046f1710175999305ab08508bae69 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 16:28:20 +0200 Subject: option to reformat citekey-based basenames of attachment files --- CITEKEYS.md | 8 +- src/bibiman/bibisetup.rs | 98 +++---- src/bibiman/citekeys.rs | 85 +++++- src/bibiman/citekeys/citekey_utils.rs | 17 +- tests/biblatex-test-citekeys.bib | 476 ---------------------------------- 5 files changed, 152 insertions(+), 532 deletions(-) delete mode 100644 tests/biblatex-test-citekeys.bib (limited to 'src') diff --git a/CITEKEYS.md b/CITEKEYS.md index 19bd497..828e557 100644 --- a/CITEKEYS.md +++ b/CITEKEYS.md @@ -147,11 +147,17 @@ should be built. - Most importantly: *always use the **`--dry-run`** option first*! This will print a list of old and new values for all citekeys in the file without changing anything. For the test file of this repo and using the pattern from - the [section below](#examples) `----dry-run` produces the following output: + the [section below](#examples) `--dry-run` produces the following output: [![niri-screenshot-2025-10-14-10-11-06.png](https://i.postimg.cc/SxxRkY8K/niri-screenshot-2025-10-14-10-11-06.png)](https://postimg.cc/bs4pRJmX) - After finding a good overall pattern, *use the `--output=` option* to create a new file and don't overwrite your existing file. Thus, your original file isn't broken if the key formatter produces some unwanted output. +- Its possible to update citekey based PDF and note files directly when + formatting the citekeys using the `-u`/`--update-attachments` option. Thus, + all PDFs and notes are already linked to the correct entries after updating + the citekeys. Since this operation can break things, use it with `--dry-run` + first. As with regular citekeys this will print all changes without processing + anything. - Even very long patterns are possible, they are not encouraged, since it bloats the bibfiles. - The same accounts for *too short* patterns; if the pattern is to unspecific, diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs index a83a507..a817236 100644 --- a/src/bibiman/bibisetup.rs +++ b/src/bibiman/bibisetup.rs @@ -22,6 +22,7 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::ffi::{OsStr, OsString}; +use std::path::Path; use std::{fs, path::PathBuf}; use walkdir::WalkDir; @@ -318,23 +319,27 @@ impl BibiSetup { cfg: &BibiConfig, ) -> Vec { let mut pdf_files = if cfg.general.pdf_path.is_some() { - collect_file_paths(cfg.general.pdf_path.as_ref().unwrap(), &Some(vec!["pdf"])) + collect_file_paths( + cfg.general.pdf_path.as_ref().unwrap(), + Some(vec!["pdf".into()].as_slice()), + ) + } else { + None + }; + let ext = if let Some(ext) = &cfg.general.note_extensions + && cfg.general.note_path.is_some() + { + // let mut ext: Vec<&str> = Vec::new(); + // for e in cfg.general.note_extensions.as_ref().unwrap().iter() { + // ext.push(e); + // } + Some(ext.as_slice()) } else { None }; - let ext: Option> = - if cfg.general.note_path.is_some() && cfg.general.note_extensions.is_some() { - let mut ext: Vec<&str> = Vec::new(); - for e in cfg.general.note_extensions.as_ref().unwrap().iter() { - ext.push(e); - } - Some(ext) - } else { - None - }; let mut note_files = if cfg.general.note_path.is_some() && cfg.general.note_extensions.is_some() { - collect_file_paths(cfg.general.note_path.as_ref().unwrap(), &ext) + collect_file_paths(cfg.general.note_path.as_ref().unwrap(), ext.clone()) } else { None }; @@ -369,7 +374,7 @@ impl BibiSetup { file_field: filepaths.1, subtitle: Self::get_subtitle(k, bibliography), notes: if note_files.is_some() { - Self::get_notepath(k, &mut note_files, &ext) + Self::get_notepath(k, &mut note_files, ext) } else { None }, @@ -575,18 +580,18 @@ impl BibiSetup { ) -> (Option>, bool) { if biblio.get(citekey).unwrap().file().is_ok() { ( - Some(vec![biblio - .get(citekey) - .unwrap() - .file() - .unwrap() - .trim() - .into()]), + Some(vec![ + biblio.get(citekey).unwrap().file().unwrap().trim().into(), + ]), true, ) } else if pdf_files.is_some() { ( - Self::merge_filepath_or_none_two(&citekey, pdf_files, vec!["pdf"]), + Self::merge_filepath_or_none_two( + &citekey, + pdf_files, + vec!["pdf".into()].as_slice(), + ), false, ) } else { @@ -597,10 +602,10 @@ impl BibiSetup { pub fn get_notepath( citekey: &str, note_files: &mut Option>>, - ext: &Option>, + ext: Option<&[String]>, ) -> Option> { if let Some(e) = ext { - Self::merge_filepath_or_none_two(citekey, note_files, e.to_vec()) + Self::merge_filepath_or_none_two(citekey, note_files, e) } else { None } @@ -627,7 +632,7 @@ impl BibiSetup { fn merge_filepath_or_none_two( citekey: &str, files: &mut Option>>, - extensions: Vec<&str>, + extensions: &[String], ) -> Option> { let mut file = Vec::new(); @@ -645,11 +650,7 @@ impl BibiSetup { } } - if file.is_empty() { - None - } else { - Some(file) - } + if file.is_empty() { None } else { Some(file) } } } @@ -663,15 +664,17 @@ impl BibiSetup { /// /// Passing [`None`] as argument for extensions will result in collecting all files /// from the given directory and its subdirectories! -pub fn collect_file_paths( - file_dir: &PathBuf, - extensions: &Option>, +pub fn collect_file_paths>( + file_dir: P, + extensions: Option<&[String]>, ) -> Option>> { let mut files: HashMap> = HashMap::new(); + let file_dir = file_dir.as_ref(); + // Expand tilde to /home/user let file_dir = if file_dir.starts_with("~") { - &app::expand_home(&file_dir) + &app::expand_home(&file_dir.to_path_buf()) } else { file_dir }; @@ -682,13 +685,13 @@ pub fn collect_file_paths( let f = file.unwrap().into_path(); if f.is_file() && f.extension().is_some() - && extensions.as_ref().is_some_and(|v| { + && extensions.is_some_and(|v| { v.contains( &f.extension() .unwrap_or_default() .to_ascii_lowercase() - .to_str() - .unwrap_or_default(), + .to_string_lossy() + .to_string(), ) }) { @@ -721,11 +724,7 @@ pub fn collect_file_paths( } } - if files.is_empty() { - None - } else { - Some(files) - } + if files.is_empty() { None } else { Some(files) } } #[cfg(test)] @@ -759,8 +758,11 @@ mod tests { ], ); - let matches = - BibiSetup::merge_filepath_or_none_two("citekey", &mut Some(files), vec!["md", "pdf"]); + let matches = BibiSetup::merge_filepath_or_none_two( + "citekey", + &mut Some(files), + vec!["md".into(), "pdf".into()].as_slice(), + ); assert_eq!( matches.clone().unwrap().iter().next().unwrap().to_owned(), @@ -770,9 +772,11 @@ mod tests { matches.clone().unwrap().last().unwrap().to_owned(), OsString::from("/one/other/citekey.pdf") ); - assert!(!matches - .clone() - .unwrap() - .contains(&OsString::from("/one/other/citekey2.pdf"))); + assert!( + !matches + .clone() + .unwrap() + .contains(&OsString::from("/one/other/citekey2.pdf")) + ); } } diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 4516b28..8f70ab0 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -16,6 +16,7 @@ ///// use std::{ + ffi::OsStr, fs::OpenOptions, io::Write, path::{Path, PathBuf}, @@ -28,7 +29,10 @@ use owo_colors::OwoColorize; use serde::{Deserialize, Serialize}; use crate::{ - bibiman::citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help}, + bibiman::{ + bibisetup::collect_file_paths, + citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help}, + }, config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; @@ -72,6 +76,7 @@ impl<'a> CitekeyFormatting<'a> { let mut formatter = CitekeyFormatting::default(); let mut source_file = PathBuf::new(); let mut target_file: Option = None; + let mut update_files = false; formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!( "Need to define {} correctly in config file", @@ -101,6 +106,7 @@ impl<'a> CitekeyFormatting<'a> { Short('t') | Short('o') | Long("target") | Long("output") => { target_file = Some(parser.value()?.into()) } + Short('u') | Long("update-attachments") => update_files = true, _ => return Err(arg.unexpected().into()), } } @@ -122,10 +128,13 @@ impl<'a> CitekeyFormatting<'a> { &*IGNORED_WORDS.as_slice() }; - formatter - .do_formatting() - .rev_sort_new_keys_by_len() - .update_file(source_file, target_file)?; + let mut updated_formatter = formatter.do_formatting().rev_sort_new_keys_by_len(); + + updated_formatter.update_file(source_file, target_file)?; + + if update_files { + updated_formatter.update_notes_pdfs(cfg)?; + } Ok(()) } @@ -194,7 +203,13 @@ impl<'a> CitekeyFormatting<'a> { target_file: Option

, ) -> color_eyre::Result<()> { if self.dry_run { - println!("Following citekeys would be formatted: old => new\n"); + println!( + "{}\n", + "Following citekeys would be formatted: old => new" + .bold() + .underline() + .white() + ); self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0)); for (old, new) in &self.old_new_keys_map { println!("{} => {}", old.italic(), new.bold()) @@ -235,6 +250,64 @@ impl<'a> CitekeyFormatting<'a> { self } + pub fn update_notes_pdfs(&self, cfg: &BibiConfig) -> color_eyre::Result<()> { + if let Some(pdf_path) = &cfg.general.pdf_path { + self.update_files_by_citekey_basename(pdf_path, vec!["pdf".into()].as_slice())?; + } + if let Some(note_path) = &cfg.general.note_path + && let Some(ext) = &cfg.general.note_extensions + { + self.update_files_by_citekey_basename(note_path, ext.as_slice())?; + } + Ok(()) + } + + fn update_files_by_citekey_basename>( + &self, + path: P, + ext: &[String], + ) -> color_eyre::Result<()> { + let files = collect_file_paths(path.as_ref(), Some(ext)); + if self.dry_run { + println!( + "\n{}\n", + "Following paths would be updated:" + .underline() + .bold() + .white() + ) + } + if let Some(mut f) = files { + for (old_key, new_key) in self.old_new_keys_map.iter() { + for e in ext { + let old_basename = old_key.to_owned() + "." + e; + if let Some(item) = f.get_mut(&old_basename) { + for p in item { + let ext = p.extension(); + let basename = new_key.to_owned() + + "." + + ext.unwrap_or(OsStr::new("")).to_str().unwrap_or(""); + let new_name = p + .parent() + .expect("parent expected") + .join(Path::new(&basename)); + if !self.dry_run { + std::fs::rename(p, new_name)?; + } else { + println!( + "{} => {}", + p.display().to_string().italic().dimmed(), + new_name.display().to_string().bold() + ) + } + } + } + } + } + } + Ok(()) + } + /// Update the `Bibliography` of the `CitekeyFormatting` struct and return /// it as `String`. pub fn print_updated_bib_as_string(&mut self) -> String { diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index 58a8274..61a1804 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -79,7 +79,7 @@ pub(super) fn formatting_help() { \t{} \tThe bibfile for which the citekey formatting should be processed. \tTakes a path as argument. - ", "-s, -f, --source=, --file=".fg::().bold()}, + ", "-s, -f, --source=, --file=".fg::().bold()}, formatdoc!( " \t{} @@ -88,9 +88,22 @@ pub(super) fn formatting_help() { \tcreated. \tIf the argument isn't used, the original file will be {}! ", - "-t, -o, --target=, --output=".fg::().bold(), + "-t, -o, --target=, --output=" + .fg::() + .bold(), "overwritten".italic(), ), + formatdoc!( + " + \t{} + \tWhen formatting citekeys also rename all PDFs and notefiles + \tfollowing the bibiman citekey-basename scheme at the locations + \tset in the config file. This option can break file paths. Try + \twith {} first! + ", + "-u, --update-attachments".fg::().bold(), + "--dry-run".bold() + ), ]; let help = help.join("\n"); println!("{}", help); diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib deleted file mode 100644 index 34c2f33..0000000 --- a/tests/biblatex-test-citekeys.bib +++ /dev/null @@ -1,476 +0,0 @@ -@set{SET, - entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006}, - annotation = {A \texttt{set} with three members.}, -} - -@set{stdmodel, - entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam}, - annotation = {A \texttt{set} with three members discussing the standard - model of particle physics.}, -} - -@collection{collection:matuz-miller_contem=litera=critic_1990gale, - title = {Contemporary Literary Criticism}, - year = {1990}, - location = {Detroit}, - publisher = {Gale}, - volume = {61}, - pages = {204--208}, - editor = {Matuz, Roger and Miller, Helen}, - keywords = {narration}, - langid = {english}, - langidopts = {variant=american}, - annotation = {A \texttt{collection} entry providing the excerpt information - for the \texttt{article:doody_heming=style=jakes_1974} entry. Note the format of the \texttt{ - pages} field}, -} - -@article{article:aksin-turkmen_effect=immobi=on_2006, - title = {Effect of immobilization on catalytic characteristics of saturated - {Pd-N}-heterocyclic carbenes in {Mizoroki-Heck} reactions}, - author = {Aks{\i}n, {\"O}zge and T{\"u}rkmen, Hayati and Artok , Levent and - { \c{C}}etinkaya, Bekir and Ni, Chaoying and B{\" u}y{ \"u}kg{\"u} - ng{ \" o}r, Orhan and {\"O}zkal, Erhan}, - volume = {691}, - number = {13}, - pages = {3027--3036}, - journaltitle = jomch, - date = {2006}, - indextitle = {Effect of immobilization on catalytic characteristics}, -} - -@article{article:angenendt_honore=salvat=vom_2002, - title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der Patrozinienkunde}, - shorttitle = {In Honore Salvatoris}, - author = {Angenendt, Arnold}, - volume = {97}, - pages = {431--456, 791--823}, - journaltitle = {Revue d'Histoire Eccl{\'e}siastique}, - date = {2002}, - langid = {german}, - indextitle = {In Honore Salvatoris}, - annotation = {A German article in a French journal. Apart from that, a - typical \texttt{article} entry. Note the \texttt{indextitle} - field}, -} - -@book{book:aristotle_de=anima_1907cambr#unive#press, - title = {De Anima}, - author = {Aristotle}, - location = {Cambridge}, - publisher = cup, - date = {1907}, - editor = {Hicks, Robert Drew}, - keywords = {primary, ancient, philosophy, athens}, - langid = {english}, - langidopts = {variant=british}, - annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ - editor}}, -} - -@book{book:aristotle_physic_1929g#p#putna, - title = {Physics}, - shorttitle = {Physics}, - author = {Aristotle}, - location = {New York}, - publisher = {G. P. Putnam}, - url = {https://www.infobooks.org/authors/classic/aristotle-books/#Physic}, - date = {1929}, - translator = {Wicksteed, P. H. and Cornford, F. M.}, - keywords = {primary, ancient, philosophy}, - langid = {english}, - langidopts = {variant=american}, - file = {~/Documents/coding/projects/bibiman/tests/book:aristotle_physic_1929g#p#putna.pdf}, - annotation = {A \texttt{book} entry with a \texttt{translator} field}, - abstract = {The Physics is a work by Aristotle dedicated to the study of - nature. Regarded by Heidegger as "the fundamental work of Western - philosophy", it presents the renowned distinction between the - four types of cause, as well as reflections on chance, motion, - infinity, and other fundamental concepts. It is here that - Aristotle sets out his celebrated paradox of time.}, -} - -@book{book:aristotle_poetic_1968clare#press, - title = {Poetics}, - shorttitle = {Poetics}, - author = {Aristotle}, - location = {Oxford}, - publisher = {Clarendon Press}, - series = {Clarendon {Aristotle}}, - date = {1968}, - editor = {Lucas, D. W.}, - keywords = {primary}, - langid = {english}, - langidopts = {variant=british}, - annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ - editor} as well as a \texttt{series} field}, -} - -@mvbook{mvbook:aristotle_rhetor=aristo=with_1877cambr#unive#press, - title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward - Meredith Cope}}, - shorttitle = {Rhetoric}, - author = {Aristotle}, - publisher = cup, - date = {1877}, - editor = {Cope, Edward Meredith}, - commentator = {Cope, Edward Meredith}, - volumes = {3}, - keywords = {primary}, - langid = {english}, - langidopts = {variant=british}, - sorttitle = {Rhetoric of Aristotle}, - indextitle = {Rhetoric of {Aristotle}, The}, - annotation = {A commented edition. Note the concatenation of the \texttt{ - editor} and \texttt{commentator} fields as well as the \texttt{ - volumes}, \texttt{sorttitle}, and \texttt{indextitle} fields}, -} - -@book{book:augustine_hetero=cataly=synthe_1995marce#dekke, - title = {Heterogeneous catalysis for the synthetic \textit{chemist}}, - shorttitle = {Heterogeneous catalysis}, - author = {Augustine, Robert L.}, - location = {New York}, - publisher = {Marcel Dekker}, - date = {1995}, - langid = {english}, - langidopts = {variant=american}, - annotation = {A plain \texttt{book} entry}, - keywords = {chemistry}, -} - -@book{book:averroes_epistl=on=possib_1982jewis#theol#semin#ameri, - title = {The Epistle on the Possibility of Conjunction with the Active - Intellect by {Ibn Rushd} with the Commentary of {Moses Narboni}}, - shorttitle = {Possibility of Conjunction}, - author = {Averroes}, - location = {New York}, - publisher = {Jewish Theological Seminary of America}, - series = {Moreshet: Studies in {Jewish} History, Literature and Thought}, - number = {7}, - date = {1982}, - editor = {Bland, Kalman P.}, - translator = {Bland, Kalman P.}, - keywords = {primary}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {Epistle on the Possibility of Conjunction, The}, - annotation = {A \texttt{book} entry with a \texttt{series} and a \texttt{ - number}. Note the concatenation of the \texttt{editor} and - \texttt{translator} fields as well as the \texttt{indextitle} - field}, -} - -@article{article:baez-lauda_higher=algebr=v_2004, - title = {Higher-Dimensional Algebra {V}: 2-Groups}, - author = {Baez, John C. and Lauda, Aaron D.}, - volume = {12}, - pages = {423--491}, - journaltitle = {Theory and Applications of Categories}, - date = {2004}, - version = {3}, - eprint = {math/0307200v3}, - eprinttype = {arxiv}, - langid = {english}, - keywords = {math}, - langidopts = {variant=american}, - annotation = {An \texttt{article} with \texttt{eprint} and \texttt{ - eprinttype} fields. Note that the arXiv reference is - transformed into a clickable link if \texttt{hyperref} support - has been enabled. Compare \texttt{baez\slash online}, which is - the same item given as an \texttt{online} entry}, -} - -@article{article:bertram-wentworth_gromov=invari=holomo_1996, - title = {Gromov invariants for holomorphic maps on {Riemann} surfaces}, - shorttitle = {Gromov invariants}, - author = {Bertram, Aaron and Wentworth, Richard}, - volume = {9}, - number = {2}, - pages = {529--571}, - journaltitle = jams, - date = {1996}, - langid = {english}, - langidopts = {variant=american}, - annotation = {An \texttt{article} entry with a \texttt{volume} and a \texttt - {number} field}, -} - -@article{article:doody_heming=style=jakes_1974, - title = {Hemingway's Style and {Jake's} Narration}, - author = {Doody, Terrence}, - year = {1974}, - journal = {The Journal of Narrative Technique}, - volume = {4}, - number = {3}, - pages = {212--225}, - langid = {english}, - langidopts = {variant=american}, - related = {matuz:article:doody_heming=style=jakes_1974}, - relatedstring = {\autocap{e}xcerpt in}, - annotation = {An \texttt{article} entry cited as an excerpt from a \texttt{ - collection} entry. Note the format of the \texttt{related} and - \texttt{relatedstring} fields}, -} - -@article{article:gillies_herder=prepar=goethe_1933, - title = {Herder and the Preparation of {Goethe's} Idea of World Literature}, - author = {Gillies, Alexander}, - series = {newseries}, - volume = {9}, - pages = {46--67}, - journaltitle = {Publications of the English Goethe Society}, - date = {1933}, - langid = {english}, - langidopts = {variant=british}, - annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt - {volume} field. Note that format of the \texttt{series} field - in the database file}, -} - -@article{article:glashow_partia=symmet=weak_1961, - title = {Partial Symmetries of Weak Interactions}, - author = {Glashow, Sheldon}, - volume = {22}, - pages = {579--588}, - journaltitle = {Nucl.~Phys.}, - date = {1961}, -} - -@article{article:herrmann-ofele_carboc=carben=as_2006, - title = {A carbocyclic carbene as an efficient catalyst ligand for {C--C} - coupling reactions}, - author = {Herrmann, Wolfgang A. and {\"O}fele, Karl and Schneider, Sabine K. - and Herdtweck, Eberhardt and Hoffmann, Stephan D.}, - volume = {45}, - number = {23}, - pages = {3859--3862}, - journaltitle = anch-ie, - date = {2006}, - indextitle = {Carbocyclic carbene as an efficient catalyst, A}, -} - -@article{article:hostetler-wingate_alkane=gold=cluste_1998, - title = {Alkanethiolate gold cluster molecules with core diameters from 1.5 - to 5.2~{nm}}, - shorttitle = {Alkanethiolate gold cluster molecules}, - author = {Hostetler, Michael J. and Wingate, Julia E. and Zhong, Chuan-Jian - and Harris, Jay E. and Vachet, Richard W. and Clark, Michael R. and - Londono, J. David and Green, Stephen J. and Stokes, Jennifer J. and - Wignall, George D. and Glish, Gary L. and Porter, Marc D. and Evans - , Neal D. and Murray, Royce W.}, - volume = {14}, - number = {1}, - pages = {17--30}, - journaltitle = {Langmuir}, - date = {1998}, - subtitle = {Core and monolayer properties as a function of core size}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {Alkanethiolate gold cluster molecules}, - annotation = {An \texttt{article} entry with \arabic{author} authors. By - default, long author and editor lists are automatically - truncated. This is configurable}, -} - -@article{article:kastenholz-hunenberger_comput=method=ionic_2006, - title = {Computation of methodology\hyphen independent ionic solvation free - energies from molecular simulations}, - author = {Kastenholz, M. A. and H{\"u}nenberger, Philippe H.}, - volume = {124}, - doi = {10.1063/1.2172593}, - journaltitle = jchph, - date = {2006}, - subtitle = {{I}. {The} electrostatic potential in molecular liquids}, - eid = {124106}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {Computation of ionic solvation free energies}, - annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{ - doi} field. Note that the \textsc{doi} is transformed into a - clickable link if \texttt{hyperref} support has been enabled}, - abstract = {The computation of \texttt{ionic} solvation free energies from atomistic - simulations is a surprisingly difficult problem that has found no - satisfactory solution for more than 15 years. The reason is that - the charging free energies evaluated from such simulations are - affected by very large errors. One of these is related to the - choice of a specific convention for summing up the contributions - of solvent charges to the electrostatic potential in the ionic - cavity, namely, on the basis of point charges within entire - solvent molecules (M scheme) or on the basis of individual point - charges (P scheme). The use of an inappropriate convention may - lead to a charge-independent offset in the calculated potential, - which depends on the details of the summation scheme, on the - quadrupole-moment trace of the solvent molecule, and on the - approximate form used to represent electrostatic interactions in - the system. However, whether the M or P scheme (if any) - represents the appropriate convention is still a matter of - on-going debate. The goal of the present article is to settle - this long-standing controversy by carefully analyzing (both - analytically and numerically) the properties of the electrostatic - potential in molecular liquids (and inside cavities within them). - }, -} - -@article{article:sarfraz-razzak_techni=sectio=algori_2002, - title = {Technical section: {An} algorithm for automatic capturing of the - font outlines}, - author = {M. Sarfraz and M. F. A. Razzak}, - year = {2002}, - journal = {Computers and Graphics}, - volume = {26}, - number = {5}, - pages = {795--804}, - issn = {0097-8493}, - annotation = {An \texttt{article} entry with an \texttt{issn} field}, -} - -@article{article:reese_georgi=anglos=diplom_1958, - title = {Georgia in {Anglo-Spanish} Diplomacy, 1736--1739}, - author = {Reese, Trevor R.}, - series = {3}, - volume = {15}, - pages = {168--190}, - journaltitle = {William and Mary Quarterly}, - date = {1958}, - langid = {english}, - langidopts = {variant=american}, - annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt - {volume} field. Note the format of the series. If the value of - the \texttt{series} field is an integer, this number is printed - as an ordinal and the string \enquote*{series} is appended - automatically}, -} - -@article{article:shore_twiceb=once=concei_1991, - title = {Twice-Born, Once Conceived}, - author = {Shore, Bradd}, - series = {newseries}, - volume = {93}, - number = {1}, - pages = {9--27}, - journaltitle = {American Anthropologist}, - date = {1991-03}, - subtitle = {Meaning Construction and Cultural Cognition}, - annotation = {An \texttt{article} entry with \texttt{series}, \texttt{volume - }, and \texttt{number} fields. Note the format of the \texttt{ - series} which is a localization key}, -} - -@article{article:sigfridsson-ryde_compar=method=derivi_1998, - title = {Comparison of methods for deriving atomic charges from the - electrostatic potential and moments}, - author = {Sigfridsson, Emma and Ryde, Ulf}, - volume = {19}, - number = {4}, - pages = {377--395}, - doi = {10.1002/(SICI)1096-987X(199803)19:4<377::AID-JCC1>3.0.CO;2-P}, - journaltitle = {Journal of Computational Chemistry}, - date = {1998}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {Methods for deriving atomic charges}, - annotation = {An \texttt{article} entry with \texttt{volume}, \texttt{number - }, and \texttt{doi} fields. Note that the \textsc{doi} is - transformed into a clickable link if \texttt{hyperref} support - has been enabled}, - abstract = {Four methods for deriving partial atomic charges from the - quantum chemical electrostatic potential (CHELP, CHELPG, - Merz-Kollman, and RESP) have been compared and critically - evaluated. It is shown that charges strongly depend on how and - where the potential points are selected. Two alternative methods - are suggested to avoid the arbitrariness in the point-selection - schemes and van der Waals exclusion radii: CHELP-BOW, which also - estimates the charges from the electrostatic potential, but with - potential points that are Boltzmann-weighted after their - occurrence in actual simulations using the energy function of the - program in which the charges will be used, and CHELMO, which - estimates the charges directly from the electrostatic multipole - moments. Different criteria for the quality of the charges are - discussed.}, -} - -@article{article:spiegelberg_intent=intent=schola_1969, - title = {\mkbibquote{Intention} und \mkbibquote{Intentionalit{\"a}t} in der - Scholastik, bei Brentano und Husserl}, - shorttitle = {Intention und Intentionalit{\"a}t}, - author = {Spiegelberg, Herbert}, - volume = {29}, - pages = {189--216}, - journaltitle = {Studia Philosophica}, - date = {1969}, - langid = {german}, - sorttitle = {Intention und Intentionalitat in der Scholastik, bei Brentano - und Husserl}, - indexsorttitle = {Intention und Intentionalitat in der Scholastik, bei - Brentano und Husserl}, - annotation = {An \texttt{article} entry. Note the \texttt{sorttitle} and - \texttt{indexsorttitle} fields and the markup of the quotes in - the database file}, -} - -@article{article:springer_mediae=pilgri=routes_1950, - title = {Mediaeval Pilgrim Routes from {Scandinavia} to {Rome}}, - shorttitle = {Mediaeval Pilgrim Routes}, - author = {Springer, Otto}, - volume = {12}, - pages = {92--122}, - journaltitle = {Mediaeval Studies}, - date = {1950}, - langid = {english}, - langidopts = {variant=british}, - annotation = {A plain \texttt{article} entry}, -} - -@article{article:weinberg_model=lepton_1967, - title = {A Model of Leptons}, - author = {Weinberg, Steven}, - volume = {19}, - pages = {1264--1266}, - journaltitle = {Phys.~Rev.~Lett.}, - date = {1967}, -} - -@string{anch-ie = {Angew.~Chem. Int.~Ed.}} - -@string{cup = {Cambridge University Press}} - -@string{dtv = {Deutscher Taschenbuch-Verlag}} - -@string{hup = {Harvard University Press}} - -@string{jams = {J.~Amer. Math. Soc.}} - -@string{jchph = {J.~Chem. Phys.}} - -@string{jomch = {J.~Organomet. Chem.}} - -@string{pup = {Princeton University Press}} - -@incollection{incollection:westfahl_true=fronti, - title = {The True Frontier}, - author = {Westfahl, Gary}, - pages = {55--65}, - subtitle = {Confronting and Avoiding the Realities of Space in {American} - Science Fiction Films}, - crossref = {westfahl:frontier}, - langid = {english}, - langidopts = {variant=american}, - indextitle = {True Frontier, The}, - annotation = {A cross-referenced article from a \texttt{collection}. This is - an \texttt{incollection} entry with a \texttt{crossref} field. - Note the \texttt{subtitle} and \texttt{indextitle} fields}, -} - -@article{article:yoon-ryu_pallad=pincer=comple_2006, - title = {Palladium pincer complexes with reduced bond angle strain: - efficient catalysts for the {Heck} reaction}, - author = {Yoon, Myeong S. and Ryu, Dowook and Kim, Jeongryul and Ahn, Kyo - Han}, - volume = {25}, - number = {10}, - pages = {2409--2411}, - journaltitle = {Organometallics}, - date = {2006}, - indextitle = {Palladium pincer complexes}, -} -- cgit v1.2.3 From b582588b642e8a38956df2b573ae2be51f19e082 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 16:50:31 +0200 Subject: correct some typos --- src/bibiman/citekeys/citekey_utils.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index 61a1804..b8f5600 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -38,11 +38,12 @@ pub(super) fn formatting_help() { ), formatdoc!("{}", "USAGE".bold()), formatdoc!( - "\t{} {} {} {}\n", + "\t{} {} {} {} {}\n", env!("CARGO_PKG_NAME").fg::().bold(), "format-citekeys".bold(), + "[OPTIONS]".bold(), "--source=".bold(), - "--output=".bold() + "[--output=]".bold() ), formatdoc!( " @@ -96,10 +97,10 @@ pub(super) fn formatting_help() { formatdoc!( " \t{} - \tWhen formatting citekeys also rename all PDFs and notefiles - \tfollowing the bibiman citekey-basename scheme at the locations - \tset in the config file. This option can break file paths. Try - \twith {} first! + \tWhen this option is set, bibiman will also rename all PDFs and + \tnotefiles following the bibiman citekey-basename scheme at the + \tlocations set in the config file. This option can break file paths. + \tTry with {} first! ", "-u, --update-attachments".fg::().bold(), "--dry-run".bold() -- cgit v1.2.3 From 2dc231247757a9a80b1925ed215f53f54eececa5 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 15 Oct 2025 07:28:20 +0200 Subject: fix tests, remove unneeded imports, add description --- src/bibiman/citekeys.rs | 6 +++--- src/bibiman/citekeys/citekey_utils.rs | 3 --- src/config.rs | 1 + 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 8f70ab0..fdeed14 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -241,9 +241,9 @@ impl<'a> CitekeyFormatting<'a> { /// That will prevent the replacement longer key parts that equal a full shorter /// key. /// - /// You are **very encouraged** to call this method before `update_file()` to - /// prevent replacing citekeys partly which afterwards wont match the pattern - /// anymore. + /// You are **very encouraged** to call this method before `update_file()` + /// or `update_notes_pdfs` to prevent replacing citekeys partly which + /// afterwards wont match the pattern anymore. pub fn rev_sort_new_keys_by_len(mut self) -> Self { self.old_new_keys_map .sort_by(|a, b| b.0.len().cmp(&a.0.len())); diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index b8f5600..773a2d2 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -15,10 +15,7 @@ // along with this program. If not, see . ///// -use std::sync::LazyLock; - use biblatex::{ChunksExt, Entry, Type}; -use deunicode::deunicode; use indoc::formatdoc; use owo_colors::{ OwoColorize, diff --git a/src/config.rs b/src/config.rs index b8d8b45..47e145c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -501,6 +501,7 @@ mod tests { year_color = "135" [citekey_formatter] + ascii_only = true "#, )?; -- cgit v1.2.3