From 67afd67d4d51a00079269d431a7058fc50750886 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 7 Oct 2025 15:05:47 +0200 Subject: implement basic citekey formatting: * Reads patterns and parses them. TODO: * **Fully** sanitize Latex macros * Preprocess complex and regularly used fields like `author` * Write changes to original bib file --- src/config.rs | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/config.rs') diff --git a/src/config.rs b/src/config.rs index 00a35b7..78cfef9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -102,6 +102,7 @@ const DEFAULT_CONFIG: &str = r##" pub struct BibiConfig { pub general: General, pub colors: Colors, + pub citekey_formatter: CitekeyFormatter, } /// Substruct [general] in config.toml @@ -143,6 +144,11 @@ pub struct Colors { pub year_color: Color, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CitekeyFormatter { + pub fields: Vec, +} + impl Default for BibiConfig { fn default() -> Self { Self { @@ -161,6 +167,7 @@ impl Default for BibiConfig { custom_column: CustomField::Pubtype, }, colors: Self::dark_colors(), + citekey_formatter: CitekeyFormatter { fields: Vec::new() }, } } } @@ -187,6 +194,7 @@ impl BibiConfig { } else { Self::dark_colors() }, + citekey_formatter: CitekeyFormatter { fields: Vec::new() }, } } -- cgit v1.2.3 From a07359a9a1da0c06c040f77158be31b3883b33ac Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 8 Oct 2025 13:49:06 +0200 Subject: refine matching and preformatting of fields for citekey formattin; add case field and enum --- Cross.toml | 6 ++ src/bibiman/citekeys.rs | 136 ++++++++++++++++++++++++++--- src/bibiman/sanitize.rs | 10 ++- src/bibiman/sanitize/optimized_sanitize.rs | 28 +++++- src/config.rs | 24 +++-- 5 files changed, 177 insertions(+), 27 deletions(-) (limited to 'src/config.rs') diff --git a/Cross.toml b/Cross.toml index e7cd27b..6140bf2 100644 --- a/Cross.toml +++ b/Cross.toml @@ -9,3 +9,9 @@ pre-build = [ "dpkg --add-architecture $CROSS_DEB_ARCH", "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH", ] + +[target.x86_64-unknown-freebsd] +# pre-build = [ +# "dpkg --add-architecture $CROSS_DEB_ARCH", +# "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH", +# ] diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 4c36e80..a304e92 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -1,13 +1,40 @@ -use biblatex::Bibliography; +// bibiman - a TUI for managing BibLaTeX databases +// Copyright (C) 2025 lukeflo +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +///// + +use biblatex::{Bibliography, ChunksExt, Entry, Type}; use color_eyre::eyre::eyre; use owo_colors::OwoColorize; +use serde::{Deserialize, Serialize}; + +use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; -use crate::config::BibiConfig; +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub(crate) enum CitekeyCase { + #[serde(alias = "uppercase", alias = "upper")] + Upper, + #[serde(alias = "lowercase", alias = "lower")] + Lower, +} #[derive(Debug, Default, Clone)] pub(crate) struct CitekeyFormatting { bib_entries: Bibliography, fields: Vec, + case: Option, } impl CitekeyFormatting { @@ -27,6 +54,7 @@ impl CitekeyFormatting { Ok(Self { bib_entries, fields, + case: cfg.citekey_formatter.case.clone(), }) } @@ -38,17 +66,36 @@ impl CitekeyFormatting { split_formatting_pat(pattern); let formatted_field_str = { let mut formatted_str = String::new(); - let field = entry.get_as::(field).expect(&format!( - "Couldn't find field {}", - field.bold().bright_red() - )); + let field = preformat_field(field, entry); + // let field = if let Ok(val) = entry.get_as::(field) { + // val + // } else { + // eprintln!( + // "Unable to get field {} for entry {}", + // field.bright_red(), + // &entry.key.bold() + // ); + // continue; + // }; + // let field = entry.get_as::(field).expect(&format!( + // "Couldn't find field {}", + // field.bold().bright_red() + // )); let mut split_field = field.split_whitespace(); let mut words_passed = 0; + let word_count = if let Some(val) = word_count { + val + } else { + field.split_whitespace().count() + // split_field.size_hint().0 + 1 + }; + dbg!(word_count); loop { if let Some(field_slice) = split_field.next() { formatted_str = formatted_str + format_word(field_slice, char_count); words_passed += 1; - if word_count.is_some_and(|count| count == words_passed) { + // if word_count.is_some_and(|count| count == words_passed) { + if word_count == words_passed { formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); break; } else { @@ -68,7 +115,51 @@ impl CitekeyFormatting { } } -fn preformat_field() {} +/// Preformat some fields which are very common to be used in citekeys +fn preformat_field(field: &str, entry: &mut Entry) -> String { + match field { + "title" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + } + "author" => { + if let Ok(authors) = entry.author() { + let mut last_names = String::new(); + for a in authors.iter() { + last_names = last_names + &a.name + " "; + } + dbg!(&last_names); + last_names + } else { + "NA".to_string() + } + } + "year" => { + if let Ok(date) = entry.date() { + date.to_chunks().format_verbatim()[..4].to_string() + } else { + entry.get_as::(field).unwrap_or("NA".into()) + } + } + "subtitle" => { + sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) + } + "editor" => { + if let Ok(editors) = entry.editors() { + let mut last_names = String::new(); + for editortypes in editors.iter() { + for e in editortypes.0.iter() { + last_names = last_names + &e.name + " "; + } + } + last_names + } else { + "NA".to_string() + } + } + "pubtype" | "entrytype" => entry.entry_type.to_string(), + _ => entry.get_as::(field).unwrap_or("Empty".into()), + } +} /// Cut of word at char count index if its set fn format_word(word: &str, count: Option) -> &str { @@ -122,7 +213,7 @@ mod tests { use biblatex::Bibliography; use itertools::Itertools; - use crate::bibiman::citekeys::{CitekeyFormatting, split_formatting_pat}; + use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat}; #[test] fn split_citekey_pattern() { @@ -149,19 +240,36 @@ mod tests { #[test] fn format_citekey_test() { - let src = r"@book{tolkien1937, author = {Tolkien}, title = {\enquote{Lord} of the \textbf{Rings}}, year = {1937}}"; + let src = r" + @book{bhambra_colonialism_social_theory_2021, + title = {Colonialism and \textbf{Modern Social Theory}}, + author = {Bhambra, Gurminder K. and Holmwood, John}, + location = {Cambridge and Medford}, + publisher = {Polity Press}, + date = {2021}, + } + "; let bibliography = Bibliography::parse(src).unwrap(); let mut formatting_struct = CitekeyFormatting { bib_entries: bibliography, fields: vec![ - "author;1;;-;_".into(), - "title;3;3;_;_".into(), + "entrytype;;;;:".into(), + "author;;;-;_".into(), + "title;4;3;_;_".into(), + "location;;4;:;_".into(), "year".into(), ], + case: None, }; formatting_struct.do_formatting(); let keys = formatting_struct.bib_entries.keys().collect_vec(); - assert_eq!(keys[0], "Tolkien_Lor_of_the_1937"); - assert_eq!(keys[0].to_lowercase(), "tolkien_lor_of_the_1937"); + assert_eq!( + keys[0], + "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" + ); + assert_eq!( + keys[0].to_lowercase(), + "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" + ); } } diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs index 9ccf4c4..8c1cc43 100644 --- a/src/bibiman/sanitize.rs +++ b/src/bibiman/sanitize.rs @@ -26,12 +26,12 @@ use optimized_sanitize::optimized_sanitize; macro_rules! optimized_sanitize_bibidata { ($bibidata:expr) => { SanitizedBibiData { - title: optimized_sanitize(&$bibidata.title), + title: optimized_sanitize(false, &$bibidata.title), subtitle: match &$bibidata.subtitle { None => None, - Some(subtitle) => Some(optimized_sanitize(subtitle)), + Some(subtitle) => Some(optimized_sanitize(false, subtitle)), }, - abstract_text: optimized_sanitize(&$bibidata.abstract_text), + abstract_text: optimized_sanitize(false, &$bibidata.abstract_text), } }; } @@ -41,3 +41,7 @@ macro_rules! optimized_sanitize_bibidata { pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData { optimized_sanitize_bibidata!(bibidata) } + +pub fn sanitize_single_string_fully(input: &str) -> String { + optimized_sanitize(true, input) +} diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs index 336cc56..dff4d32 100644 --- a/src/bibiman/sanitize/optimized_sanitize.rs +++ b/src/bibiman/sanitize/optimized_sanitize.rs @@ -31,6 +31,17 @@ static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = ph r"\textsc" => ("", Some("")), }; +static LOOKUP_CLEAR_ALL: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! { + r"\mkbibquote" => ("", Some("")), + r"\enquote*" => ("", Some("")), + r"\enquote" => ("", Some("")), + r"\hyphen" => ("", None), + r"\textbf" => ("", Some("")), + r"\textit" => ("", Some("")), + r"\texttt" => ("", Some("")), + r"\textsc" => ("", Some("")), +}; + #[derive(Logos, Debug)] enum Token { #[token("{")] @@ -43,7 +54,12 @@ enum Token { ForcedSpace, } -pub fn optimized_sanitize(input_text: &str) -> String { +pub fn optimized_sanitize(clear_all: bool, input_text: &str) -> String { + let lookup = if clear_all { + &LOOKUP_CLEAR_ALL + } else { + &LOOKUP + }; let mut char_counter: usize = 0; let mut contains_macro: bool = false; for char in input_text.chars() { @@ -87,7 +103,7 @@ pub fn optimized_sanitize(input_text: &str) -> String { } Token::LaTeXMacro => { let texmacro = lex.slice(); - if let Some(x) = LOOKUP.get(&texmacro.trim_end()) { + if let Some(x) = lookup.get(&texmacro.trim_end()) { if let Some(end) = x.1 { bc_up = true; counter_actions.insert(bracket_counter + 1, end); @@ -115,11 +131,17 @@ mod tests { #[test] fn check_sanitization() { let result = optimized_sanitize( + false, r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}", ); assert_eq!( "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"", result - ) + ); + let result = optimized_sanitize( + true, + r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}", + ); + assert_eq!("Intention und Intentionen sind bibquote.", result) } } diff --git a/src/config.rs b/src/config.rs index 78cfef9..8a333e4 100644 --- a/src/config.rs +++ b/src/config.rs @@ -16,21 +16,24 @@ ///// use std::{ - fs::{create_dir_all, File}, - io::{stdin, Write}, + fs::{File, create_dir_all}, + io::{Write, stdin}, path::PathBuf, str::FromStr, }; use color_eyre::{eyre::Result, owo_colors::OwoColorize}; use figment::{ - providers::{Format, Serialized, Toml}, Figment, + providers::{Format, Serialized, Toml}, }; use ratatui::style::Color; use serde::{Deserialize, Serialize}; -use crate::{bibiman::bibisetup::CustomField, cliargs::CLIArgs}; +use crate::{ + bibiman::{bibisetup::CustomField, citekeys::CitekeyCase}, + cliargs::CLIArgs, +}; const DEFAULT_CONFIG: &str = r##" # [general] @@ -147,6 +150,7 @@ pub struct Colors { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct CitekeyFormatter { pub fields: Vec, + pub case: Option, } impl Default for BibiConfig { @@ -167,7 +171,10 @@ impl Default for BibiConfig { custom_column: CustomField::Pubtype, }, colors: Self::dark_colors(), - citekey_formatter: CitekeyFormatter { fields: Vec::new() }, + citekey_formatter: CitekeyFormatter { + fields: Vec::new(), + case: None, + }, } } } @@ -194,7 +201,10 @@ impl BibiConfig { } else { Self::dark_colors() }, - citekey_formatter: CitekeyFormatter { fields: Vec::new() }, + citekey_formatter: CitekeyFormatter { + fields: Vec::new(), + case: None, + }, } } @@ -352,8 +362,8 @@ fn select_opener() -> String { #[cfg(test)] mod tests { use figment::{ - providers::{Format, Toml}, Figment, + providers::{Format, Toml}, }; use super::BibiConfig; -- cgit v1.2.3 From 8b858f92da69cfb8fa43ec861cda46eeb6ef4bbe Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 8 Oct 2025 14:39:46 +0200 Subject: case parsing from config, needs to be implemented for citekey struct --- src/bibiman/citekeys.rs | 95 +++++++++++++++++++++++-------------------------- src/config.rs | 8 +++-- 2 files changed, 49 insertions(+), 54 deletions(-) (limited to 'src/config.rs') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index a304e92..118ae3e 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -23,7 +23,7 @@ use serde::{Deserialize, Serialize}; use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub(crate) enum CitekeyCase { +pub enum CitekeyCase { #[serde(alias = "uppercase", alias = "upper")] Upper, #[serde(alias = "lowercase", alias = "lower")] @@ -44,7 +44,11 @@ impl CitekeyFormatting { /// The `Bibliography` inserted will be edited in place with the new citekeys. /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography` pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result { - let fields = cfg.citekey_formatter.fields.clone(); + let fields = cfg + .citekey_formatter + .fields + .clone() + .expect("Need to define fields in config to format citekeys"); if fields.is_empty() { return Err(eyre!( "To format all citekeys, you need to provide {} values in the config file", @@ -58,65 +62,54 @@ impl CitekeyFormatting { }) } + /// Process the actual formatting. The citekey of every entry will be updated. pub fn do_formatting(&mut self) { for entry in self.bib_entries.iter_mut() { - let mut new_citekey = String::new(); - for pattern in self.fields.iter() { - let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = - split_formatting_pat(pattern); - let formatted_field_str = { - let mut formatted_str = String::new(); - let field = preformat_field(field, entry); - // let field = if let Ok(val) = entry.get_as::(field) { - // val - // } else { - // eprintln!( - // "Unable to get field {} for entry {}", - // field.bright_red(), - // &entry.key.bold() - // ); - // continue; - // }; - // let field = entry.get_as::(field).expect(&format!( - // "Couldn't find field {}", - // field.bold().bright_red() - // )); - let mut split_field = field.split_whitespace(); - let mut words_passed = 0; - let word_count = if let Some(val) = word_count { - val + entry.key = build_citekey(entry, &self.fields); + } + } +} + +/// Build the citekey from the patterns defined in the config file +fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String { + let mut new_citekey = String::new(); + for pattern in pattern_fields.iter() { + let (field, word_count, char_count, inner_delimiter, trailing_delimiter) = + split_formatting_pat(pattern); + let formatted_field_str = { + let mut formatted_str = String::new(); + let field = preformat_field(field, entry); + let mut split_field = field.split_whitespace(); + let mut words_passed = 0; + let word_count = if let Some(val) = word_count { + val + } else { + field.split_whitespace().count() + }; + loop { + if let Some(field_slice) = split_field.next() { + formatted_str = formatted_str + format_word(field_slice, char_count); + words_passed += 1; + if word_count == words_passed { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; } else { - field.split_whitespace().count() - // split_field.size_hint().0 + 1 - }; - dbg!(word_count); - loop { - if let Some(field_slice) = split_field.next() { - formatted_str = formatted_str + format_word(field_slice, char_count); - words_passed += 1; - // if word_count.is_some_and(|count| count == words_passed) { - if word_count == words_passed { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or("") - } - } else { - formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); - break; - }; + formatted_str = formatted_str + inner_delimiter.unwrap_or("") } - formatted_str + } else { + formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); + break; }; - new_citekey = new_citekey + &formatted_field_str; } - entry.key = new_citekey; - } + formatted_str + }; + new_citekey = new_citekey + &formatted_field_str; } + new_citekey } /// Preformat some fields which are very common to be used in citekeys -fn preformat_field(field: &str, entry: &mut Entry) -> String { +fn preformat_field(field: &str, entry: &Entry) -> String { match field { "title" => { sanitize_single_string_fully(&entry.get_as::(field).unwrap_or("NA".into())) diff --git a/src/config.rs b/src/config.rs index 8a333e4..a5df61c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -149,7 +149,7 @@ pub struct Colors { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct CitekeyFormatter { - pub fields: Vec, + pub fields: Option>, pub case: Option, } @@ -172,7 +172,7 @@ impl Default for BibiConfig { }, colors: Self::dark_colors(), citekey_formatter: CitekeyFormatter { - fields: Vec::new(), + fields: None, case: None, }, } @@ -202,7 +202,7 @@ impl BibiConfig { Self::dark_colors() }, citekey_formatter: CitekeyFormatter { - fields: Vec::new(), + fields: None, case: None, }, } @@ -400,6 +400,8 @@ mod tests { author_color = "38" title_color = "37" year_color = "135" + + [citekey_formatter] "#, )?; -- cgit v1.2.3 From c69b1789fabaf149916d160922d7026f2cbe33f1 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 14:57:53 +0200 Subject: implement const of ignored special chars for citekey formatting * the list contains 33 special chars at the moment * it will only affect already existing special chars in biblatex fields * delimiter specified for citekey formatting are not affected * char count is also not affected, ignored chars are not counted --- src/bibiman/citekeys.rs | 40 +++++++++++++++++++++------------------- src/config.rs | 5 +++++ tests/test-config.toml | 2 +- 3 files changed, 27 insertions(+), 20 deletions(-) (limited to 'src/config.rs') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 5121741..7c06886 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -31,7 +31,10 @@ use owo_colors::{ }; use serde::{Deserialize, Serialize}; -use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig}; +use crate::{ + bibiman::sanitize::sanitize_single_string_fully, + config::{BibiConfig, IGNORED_SPECIAL_CHARS}, +}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum CitekeyCase { @@ -354,26 +357,25 @@ fn preformat_field(field: &str, entry: &Entry) -> String { /// Cut of word at char count index if its set fn format_word(word: &str, count: Option) -> String { - if let Some(len) = count - && len < word.chars().count() - { - // Since chars can consist of multiple bytes, we need this more complex - // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) - // instead of simple byte indexing - let mut word_slice = String::new(); - let word_chars = word.chars(); - let mut counter = 0; - for c in word_chars { - if counter == len { - break; - } - word_slice.push(c); - counter += 1; + // Since chars can consist of multiple bytes, we need this more complex + // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) + // instead of simple byte indexing + let mut word_slice = String::new(); + let word_chars = word.chars(); + let mut counter = 0; + for c in word_chars { + if let Some(len) = count + && counter == len + { + break; } - word_slice - } else { - word.to_string() + if IGNORED_SPECIAL_CHARS.contains(&c) { + continue; + } + word_slice.push(c); + counter += 1; } + word_slice } /// Split a formatting pattern of kind diff --git a/src/config.rs b/src/config.rs index a5df61c..a4e89be 100644 --- a/src/config.rs +++ b/src/config.rs @@ -35,6 +35,11 @@ use crate::{ cliargs::CLIArgs, }; +pub const IGNORED_SPECIAL_CHARS: [char; 33] = [ + '?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%', + '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', +]; + const DEFAULT_CONFIG: &str = r##" # [general] ## Default files/dirs which are loaded on startup diff --git a/tests/test-config.toml b/tests/test-config.toml index b484b69..558d216 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,5 +61,5 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = [ "author;2;;-;_", "title;3;3;_;_", "year" ] +fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] case = "lowercase" -- cgit v1.2.3 From 418d2f3874c8e86c4b58143115ee3d4181130f9c Mon Sep 17 00:00:00 2001 From: lukeflo Date: Fri, 10 Oct 2025 15:09:48 +0200 Subject: add dry-run information to --help function --- src/bibiman/citekeys.rs | 11 +++++++++++ src/config.rs | 15 +++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'src/config.rs') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 7c06886..f7704fb 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -238,6 +238,17 @@ fn formatting_help() { ", "-h, --help".fg::().bold() ), + formatdoc!( + " + \t{} + \tDon't apply any changes to the named files. Instead print all + \told citekeys and the formatted strings that would have been + \tapplied in the format: {} => {} + ", + "-d, --dry-run".fg::().bold(), + "old_key".italic(), + "new_key".bold() + ), formatdoc! {" \t{} \tThe bibfile for which the citekey formatting should be processed. diff --git a/src/config.rs b/src/config.rs index a4e89be..b1c4b07 100644 --- a/src/config.rs +++ b/src/config.rs @@ -103,6 +103,21 @@ const DEFAULT_CONFIG: &str = r##" # author_color = "38" # title_color = "37" # year_color = "135" + +# [citekey_formatter] +## Define the patterns for creating citekeys. Every item of the array consists of +## five components separated by semicolons. Despite the field name every component +## can be left blank: +## - name of the biblatex field ("author", "title"...) +## - number of max words from the given field +## - number of chars used from each word +## - delimiter to separate words of the same field +## - trailing delimiter separating the current field from the following +# fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] + +## Convert chars to specified case. Possible values: +## "upper", "uppercase", "lower", "lowercase" +# case = "lowercase" "##; /// Main struct of the config file. Contains substructs/headings in toml -- cgit v1.2.3 From 467851007e1861834326deee3116aa88fe839f5a Mon Sep 17 00:00:00 2001 From: lukeflo Date: Mon, 13 Oct 2025 15:45:53 +0200 Subject: Working proof of concept of citekey formatting --- CITEKEYS.md | 215 +++++++++++++++ Cargo.lock | 7 + Cargo.toml | 1 + README.md | 20 ++ src/bibiman/citekeys.rs | 69 +++-- src/bibiman/citekeys/citekey_utils.rs | 105 ++++---- src/config.rs | 69 +++++ tests/biblatex-test-citekeys.bib | 476 ++++++++++++++++++++++++++++++++++ tests/test-config.toml | 5 +- 9 files changed, 889 insertions(+), 78 deletions(-) create mode 100644 CITEKEYS.md create mode 100644 tests/biblatex-test-citekeys.bib (limited to 'src/config.rs') diff --git a/CITEKEYS.md b/CITEKEYS.md new file mode 100644 index 0000000..912326a --- /dev/null +++ b/CITEKEYS.md @@ -0,0 +1,215 @@ +# Formatting Citekeys + + + +- [Formatting Citekeys](#formatting-citekeys) + - [Settings](#settings) + - [Building Patterns](#building-patterns) + - [Ignore Lists and Char Case](#ignore-lists-and-char-case) + - [General Tipps](#general-tipps) + - [Examples](#examples) + + + +`bibiman` offers the possibility to create new citekeys from the fields of +BibLaTeX entries. This is done using an easy but powerful pattern-matching +syntax. + +## Settings + +All settings for the citekey generation have to be configured in the used config +file. The regular path is `XDG_CONFIG_DIR/bibiman/bibiman.toml`. But it can be +set dynamically with the `-c`/`--config=` global option. + +Following values can be set through the config file. A detailed explanation for +all fields follows below: + +```toml +[citekey_formatter] +fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] +case = "lowercase" +ascii_only = true +ignored_chars = [ + "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", "\"", +] +ignored_words = [ + "the", + "a", + "an", + "of", + "for", + "in", + "at", + "to", + "and", + "der", + "die", + "das", + "ein", + "eine", + "eines", + "des", + "auf", + "und", + "für", + "vor", +] +``` + +## Building Patterns + +The main aspect for generating citekeys are the field patterns. They can be set +through an array in the config file where every array-item represents a single +BibLaTeX field to be used for generating a part of the citekey. + +Every field pattern consists of the following five parts separated by +semicolons. The general pattern looks like this (every subfield is explained +below): + +*biblatex field name* **;** *max word count* **;** *max char count* **;** *inner delimiter* **;** *trailing delimiter* + +- **BibLaTeX field**: the first part represents the field name which value + should be used to generate the content part of the citekey. Theoretically, any + BibLaTeX field can be selected by name. But there are some fields which are + much more common than others; e.g. `author`, `editor`, `title`, `year`/`date` + or `entrytype`. Those very common fields are preprocessed; meaning that for + instance LaTeX macros are fully stripped from the strings, or that `editor` is + a fallback value for `author` if the latter is empty (however, setting + `editor` explicitly is still possible). Also using `year` will parse the + `date` field too, to ensure a year number. +- **Max Word**: Defines how many words should maximal be used from the named + field. E.g. if the title consists of five words, and the max counter is set to + `3` only the first three fields will be used. +- **Max Chars/Word**: Defines how many chars, counting from the start, of each + word will be used to build the citekey. If for instance the value is set to + `5`, only the first five chars of any word will be used. Thus, "archaeology" + would be stripped down to "archa". +- **Inner Delimiter**: Sets the delimiter char used between words from the + currently named field; e.g. to separate the words of the `title` field. +- **Trailing Delimiter**: Sets the delimiter which separates the current fields + value from the following. This delimiter is only printed if the following + field has some content. + +For example, to use the `title` field, print maximal three words and of those +only the first five chars, single words separated by underscore and the whole +field separated by equal sign, insert the following pattern field into the +`fields` array: + +`title;3;5;_;=` + +Except the BibLaTeX field name, all other parts of the pattern can be left +blank. If the field name is the only value set, semicolon delimiters are also +not necessary. But if only one of the following parts should be set, all +delimiters need to be used. E.g. those are both valid: `title` or `title;;;_;=`. +The first would print all words of the title, no matter the length, not +separated by any char. The last would also print all words of the title, but +single words separated by underscores and the whole pattern value separated from +the following by an equal sign. This is not valid: `title;;_` since `bibiman` +can't know if the underscore means a delimiter (and which) or the max char +count. + +The pattern array inside the config file takes multiple pattern fields like the +predecing. This allows an elaborated citekey pattern which takes into account +multiple fields. + +## Ignore Lists and Char Case + +Beside the field patterns there are some other options to define how citekeys +should be built. + +`ascii_only=` +: If set to `true`, which is the default, non-ascii chars are mapped to their + ascii equivalent. For example, the German `ä` would be mapped to `a`. The + Turkish `ş` or Greek `σ`/`ς` would be mapped to `s`. If set to `false` all are + kept as they are. But this could lead to errors running LaTeX on the file. + +`case=` +: If used, sets the case of the chars in the citekey. Valid values are + `uppercase`, `lowercase` or `camelcase`. Both first should be clear, the + latter means typical camel case also beginning the *first word* with an + uppercase letter; also referenced as upper camel case or Pascal case. + +`ignored_chars=` +: Defines chars which should be ignored during parsing (meaning not print them). + The default list contains 33 special chars and is part of the default config + file (in out-commented state). Be aware, setting this key will completely + overwrite the default list! + +`ignored_words=` +: A list of words which should be ignored parsing field values. The default list + contains about 20 very commonly used words in English and German; like + articles, pronouns or connector words. Like with `ignored_chars` setting this + key will completely overwrite the default list! + +## General Tipps + +- Most importantly: *always use the **`--dry-run`** option first*! This will + print a list of old and new values for all citekeys in the file without + changing anything. +- After finding a good overall pattern, *use the `--output=` option* to create a + new file and don't overwrite your existent file. Thus, your original file + isn't broken if the key formatter produces some unwanted output. +- Even very long patterns are possible, they are not encouraged, since it bloats + the bibfiles. +- The same accounts for *too short* patterns; if the pattern is to unspecific, + it bares the risk of producing doublettes (e.g. single author and year only). + But the citekey generator will not check for doublettes! +- It is possible to keep special chars and use them as delimiters. But this + might cause problems other programs and CLI tools in particular, since many + special chars are reserved for shell operations. For instance, it will very + likely break the note file feature of `bibiman` which doesn't accept many + special chars. + +## Examples + +To make the process more clear a few examples might help. Following bibfile is +assumed: + +```latex +@article{Bos2023, + title = {{LaTeX}, metadata, and publishing workflows}, + author = {Bos, Joppe W. and {McCurley}, Kevin S.}, + year = {2023}, + month = apr, + journal = {arXiv}, + number = {{arXiv}:2301.08277}, + doi = {10.48550/arXiv.2301.08277}, + url = {http://arxiv.org/abs/2301.08277}, + urldate = {2023-08-22}, + note = {type: article}, +} +@book{Bhambra2021, + title = {Colonialism and \textbf{Modern Social Theory}}, + author = {Bhambra, Gurminder K. and Holmwood, John}, + location = {Cambridge and Medford}, + publisher = {Polity Press}, + date = {2021}, + +``` + +And the following values set in the config file: + +```toml +fields = [ + # Just print the whole entrytype and a colon as trailing delimiter + "entrytype;;;;:", + # Print all author names in full length, names separated by dash, + # the whole field by underscore + "author;;;-;_", + # Print first 4 words of title, first 3 chars of every word only. Title words + # separated by equal sign, the whole field by underscore + "title;4;3;=;_", + # Print all words of location, but only first 4 chars of every word. Single words + # separated by colon, whole field by underscore + "location;;4;:;_", + # Just print the whole year + "year", +] +case = "lowercase" +ascii_only = true +``` + +The combination of those setting will produce the following citekeys: + +- **`article:bos-mccurley_lat=met=pub=wor_2023`** +- **`book:bhambra-holmwood_col=mod=soc=the_camb:medf_2021`** diff --git a/Cargo.lock b/Cargo.lock index a27636e..0adb4e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,6 +103,7 @@ dependencies = [ "biblatex", "color-eyre", "crossterm", + "deunicode", "dirs", "editor-command", "figment", @@ -323,6 +324,12 @@ dependencies = [ "syn", ] +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + [[package]] name = "dirs" version = "5.0.1" diff --git a/Cargo.toml b/Cargo.toml index abf1eee..0c07c51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ owo-colors = "4.2.2" logos = "0.15.1" phf = { version = "0.13.1", features = ["macros"] } indoc = "2.0.6" +deunicode = "1.6.2" [workspace.metadata.cross.target.aarch64-unknown-linux-gnu] # Install libssl-dev:arm64, see diff --git a/README.md b/README.md index 4929509..3fb81c8 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,11 @@ - [Ubuntu/Debian](#ubuntudebian) - [Void Linux](#void-linux) - [Usage](#usage) + - [CLI for citekey formatting](#cli-for-citekey-formatting) - [Configuration](#configuration) - [Location of Config File](#location-of-config-file) - [General Configuration](#general-configuration) + - [Citekey formatting](#citekey-formatting) - [Color Configuration](#color-configuration) - [Features](#features) - [Keybindings](#keybindings) @@ -196,6 +198,13 @@ bibman tests/multi-files/ bibiman tests/biblatex-test.bib tests/multi-files/ ``` +### CLI for citekey formatting + +Beside the TUI `bibiman` can format and replace citekeys. To make use of this +feature run the program with the `format-citekeys` subcommand. For more +information on this use `bibiman format-citekeys --help` and the +[docs](./CITEKEYS.md). + ## Configuration ### Location of Config File @@ -268,6 +277,11 @@ note_symbol = "󰧮" ## Possible values are "journaltitle", "organization", "instituion", "publisher" ## and "pubtype" (which is the default) custom_column = "pubtype" + +[citekey_formatter] +fields = [] +ascii_only = true +case = "lowercase" ``` `bibfiles` @@ -326,6 +340,12 @@ custom_column = "pubtype" good advice to use a rather wide terminal window when using a value like `journaltitle`. +### Citekey formatting + +`bibiman` now also offers a citekey generating feature. This enables to reformat +all citekeys based on an elaborated pattern matching syntax. For furthter +information and examples see the [docs](CITEKEYS.md). + ### Color Configuration Furthermore, it is now possible to customize the colors. The following values diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 2f56947..0cec28e 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -29,7 +29,7 @@ use serde::{Deserialize, Serialize}; use crate::{ bibiman::citekeys::citekey_utils::{build_citekey, formatting_help}, - config::BibiConfig, + config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; mod citekey_utils; @@ -60,6 +60,7 @@ pub(crate) struct CitekeyFormatting { case: Option, old_new_keys_map: Vec<(String, String)>, dry_run: bool, + ascii_only: bool, } impl CitekeyFormatting { @@ -69,14 +70,15 @@ impl CitekeyFormatting { ) -> color_eyre::Result<()> { let mut formatter = CitekeyFormatting::default(); - formatter.fields = cfg - .citekey_formatter - .fields - .clone() - .ok_or_eyre("Need to define fields correctly in config file")?; + formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!( + "Need to define {} correctly in config file", + "citekey pattern fields".red() + ))?; formatter.case = cfg.citekey_formatter.case.clone(); + formatter.ascii_only = cfg.citekey_formatter.ascii_only; + if formatter.fields.is_empty() { return Err(eyre!( "To format all citekeys, you need to provide {} values in the config file", @@ -105,13 +107,26 @@ impl CitekeyFormatting { formatter.bib_entries = Bibliography::parse(&bibstring) .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?; + let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars { + chars.as_slice() + } else { + IGNORED_SPECIAL_CHARS.as_slice() + }; + + let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words { + words.as_slice() + } else { + &*IGNORED_WORDS.as_slice() + }; + formatter - .do_formatting() + .do_formatting(ignored_chars, ignored_words) .rev_sort_new_keys_by_len() .update_file()?; Ok(()) } + /// Start Citekey formatting with building a new instance of `CitekeyFormatting` /// Formatting is processed file by file, because `bibman` can handle /// multi-file setups. @@ -144,16 +159,24 @@ impl CitekeyFormatting { case: cfg.citekey_formatter.case.clone(), old_new_keys_map: Vec::new(), dry_run: false, + ascii_only: cfg.citekey_formatter.ascii_only, }) } /// Process the actual formatting. The citekey of every entry will be updated. - pub fn do_formatting(&mut self) -> &mut Self { + pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter() { old_new_keys.push(( entry.key.clone(), - build_citekey(entry, &self.fields, self.case.as_ref()), + build_citekey( + entry, + &self.fields, + self.case.as_ref(), + self.ascii_only, + ignored_chars, + ignored_words, + ), )); } @@ -215,12 +238,15 @@ mod tests { use biblatex::Bibliography; - use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting}; + use crate::{ + bibiman::citekeys::{CitekeyCase, CitekeyFormatting}, + config::{IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, + }; #[test] fn format_citekey_test() { let src = r" - @article{bos_latex_metadata_and_publishing_workflows_2023, + @article{Bos2023, title = {{LaTeX}, metadata, and publishing workflows}, author = {Bos, Joppe W. and {McCurley}, Kevin S.}, year = {2023}, @@ -232,7 +258,7 @@ mod tests { urldate = {2023-08-22}, note = {type: article}, } - @book{bhambra_colonialism_social_theory_2021, + @book{Bhambra2021, title = {Colonialism and \textbf{Modern Social Theory}}, author = {Bhambra, Gurminder K. and Holmwood, John}, location = {Cambridge and Medford}, @@ -247,29 +273,24 @@ mod tests { fields: vec![ "entrytype;;;;:".into(), "author;;;-;_".into(), - "title;4;3;_;_".into(), + "title;4;3;=;_".into(), "location;;4;:;_".into(), "year".into(), ], - case: None, + case: Some(CitekeyCase::Lower), old_new_keys_map: Vec::new(), dry_run: false, + ascii_only: true, }; - let _ = formatting_struct.do_formatting(); + let _ = formatting_struct + .do_formatting(IGNORED_SPECIAL_CHARS.as_slice(), &*IGNORED_WORDS.as_slice()); assert_eq!( formatting_struct.old_new_keys_map.get(0).unwrap().1, - "article:Bos-McCurley_LaT_met_and_pub_Empt_2023" + "article:bos-mccurley_lat=met=pub=wor_2023" ); assert_eq!( formatting_struct.old_new_keys_map.get(1).unwrap().1, - "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021" - ); - formatting_struct.case = Some(CitekeyCase::Lower); - let _ = formatting_struct.do_formatting().rev_sort_new_keys_by_len(); - // now the longer citekey is processed first and its in lowercase! - assert_eq!( - formatting_struct.old_new_keys_map.get(0).unwrap().1, - "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021" + "book:bhambra-holmwood_col=mod=soc=the_camb:medf_2021" ); } diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index ee2c849..5f70224 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -16,21 +16,14 @@ ///// use biblatex::{ChunksExt, Entry, Type}; +use deunicode::deunicode; use indoc::formatdoc; use owo_colors::{ OwoColorize, colors::{BrightBlue, Green, White}, }; -use crate::{ - bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}, - config::IGNORED_SPECIAL_CHARS, -}; - -const IGNORE_WORDS: [&str; 20] = [ - "the", "a", "an", "of", "for", "in", "at", "to", "and", "der", "die", "das", "ein", "eine", - "eines", "des", "auf", "und", "für", "vor", -]; +use crate::bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}; pub(super) fn formatting_help() { let help = vec![ @@ -104,6 +97,9 @@ pub(super) fn build_citekey( entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>, + ascii_only: bool, + ignored_chars: &[char], + ignored_words: &[String], ) -> String { // mut string the citekey is built from let mut new_citekey = String::new(); @@ -114,7 +110,7 @@ pub(super) fn build_citekey( // loop over pattern fields process them 'field_loop: for pattern in pattern_fields.iter() { // parse single values from pattern field - let (field_name, word_count, char_count, inner_delimiter, cur_trailing_delimiter) = + let (field_name, max_words, max_chars, inner_delimiter, cur_trailing_delimiter) = split_formatting_pat(pattern); // built the part of the citekey from the current pattern field @@ -126,16 +122,9 @@ pub(super) fn build_citekey( // split at whitespaces, count fields and set counter for processed // splits - let mut split_field = field.split_whitespace(); + let split_field = field.split_whitespace(); let mut words_passed = 0; let field_count = field.split_whitespace().count(); - let word_count = if let Some(val) = word_count - && val <= field_count - { - val - } else { - field_count - }; // If there is a trailing delimiter from the previous field, push it if let Some(del) = trailing_delimiter { @@ -152,47 +141,57 @@ pub(super) fn build_citekey( } // loop over single parts of current field and add correct delimiter - 'word_loop: loop { - // process the single slices and add correct delimiter - if let Some(field_slice) = split_field.next() { - // Create word slice char by char. We need to loop over chars - // instead of a simple bytes index to also catch chars which - // consist of more than one byte (äöüøæ etc...) - let mut word_slice = String::new(); - let word_chars = field_slice.chars(); - let mut counter = 0; - 'char_loop: for mut c in word_chars { - // If camelcase is set, force first char of word to uppercase - if counter == 0 && case == Some(&CitekeyCase::Camel) { - c = c.to_ascii_uppercase() - } - if let Some(len) = char_count - && counter == len - { - break 'char_loop; - } - // if a word slice contains a special char, skip it - if IGNORED_SPECIAL_CHARS.contains(&c) { - continue 'char_loop; - } + // process the single slices and add correct delimiter + 'word_loop: for (idx, field_slice) in split_field.enumerate() { + // if the current slice is a common word from the ignore list, + // skip it. + if ignored_words.contains(&field_slice.to_lowercase()) { + continue; + } + + // Create word slice char by char. We need to loop over chars + // instead of a simple bytes index to also catch chars which + // consist of more than one byte (äöüøæ etc...) + let mut word_slice = String::new(); + let word_chars = field_slice.chars(); + let mut counter = 0; + 'char_loop: for mut c in word_chars { + // If camelcase is set, force first char of word to uppercase + if counter == 0 && case == Some(&CitekeyCase::Camel) { + c = c.to_ascii_uppercase() + } + if let Some(len) = max_chars + && counter >= len + { + break 'char_loop; + } + // if a word slice contains a special char, skip it + if ignored_chars.contains(&c) { + continue 'char_loop; + } + // if non-ascii chars should be mapped, check if needed and do it + if let Some(chars) = deunicode::deunicode_char(c) + && ascii_only + { + word_slice.push_str(chars); + counter += chars.len(); + } else { word_slice.push(c); counter += 1; } - // Don't count empty slices and don't add delimiter to those - if !word_slice.is_empty() { - formatted_str = formatted_str + &word_slice; - words_passed += 1; - if word_count == words_passed { - break 'word_loop; - } else { - formatted_str = formatted_str + inner_delimiter.unwrap_or(""); - } + } + // Don't count empty slices and don't add delimiter to those + if !word_slice.is_empty() { + formatted_str = formatted_str + &word_slice; + words_passed += 1; + if max_words.is_some_and(|max| max == words_passed) || idx + 1 == field_count { + break 'word_loop; } else { - continue 'word_loop; + formatted_str = formatted_str + inner_delimiter.unwrap_or(""); } } else { - break 'word_loop; - }; + continue 'word_loop; + } } formatted_str }; diff --git a/src/config.rs b/src/config.rs index b1c4b07..7c1a0f8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -20,6 +20,7 @@ use std::{ io::{Write, stdin}, path::PathBuf, str::FromStr, + sync::LazyLock, }; use color_eyre::{eyre::Result, owo_colors::OwoColorize}; @@ -40,6 +41,31 @@ pub const IGNORED_SPECIAL_CHARS: [char; 33] = [ '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', ]; +pub static IGNORED_WORDS: LazyLock> = LazyLock::new(|| { + vec![ + String::from("the"), + String::from("a"), + String::from("an"), + String::from("of"), + String::from("for"), + String::from("in"), + String::from("at"), + String::from("to"), + String::from("and"), + String::from("der"), + String::from("die"), + String::from("das"), + String::from("ein"), + String::from("eine"), + String::from("eines"), + String::from("des"), + String::from("auf"), + String::from("und"), + String::from("für"), + String::from("vor"), + ] +}); + const DEFAULT_CONFIG: &str = r##" # [general] ## Default files/dirs which are loaded on startup @@ -118,6 +144,40 @@ const DEFAULT_CONFIG: &str = r##" ## Convert chars to specified case. Possible values: ## "upper", "uppercase", "lower", "lowercase" # case = "lowercase" + +## Map all unicode chars to their pure ascii equivalent +# ascii_only = true + +## List of special chars that'll be ignored when building citekeys. +## A custom list will overwrite the default list +# ignored_chars = [ +# "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", """, +# ] + +## List of words that'll be ignored when building citekeys. +## A custom list will overwrite the default list +# ignored_words = [ +# "the", +# "a", +# "an", +# "of", +# "for", +# "in", +# "at", +# "to", +# "and", +# "der", +# "die", +# "das", +# "ein", +# "eine", +# "eines", +# "des", +# "auf", +# "und", +# "für", +# "vor", +# ] "##; /// Main struct of the config file. Contains substructs/headings in toml @@ -171,6 +231,9 @@ pub struct Colors { pub struct CitekeyFormatter { pub fields: Option>, pub case: Option, + pub ascii_only: bool, + pub ignored_chars: Option>, + pub ignored_words: Option>, } impl Default for BibiConfig { @@ -194,6 +257,9 @@ impl Default for BibiConfig { citekey_formatter: CitekeyFormatter { fields: None, case: None, + ascii_only: true, + ignored_chars: None, + ignored_words: None, }, } } @@ -224,6 +290,9 @@ impl BibiConfig { citekey_formatter: CitekeyFormatter { fields: None, case: None, + ascii_only: true, + ignored_chars: None, + ignored_words: None, }, } } diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib new file mode 100644 index 0000000..9767f97 --- /dev/null +++ b/tests/biblatex-test-citekeys.bib @@ -0,0 +1,476 @@ +@set{set, + entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006}, + annotation = {A \texttt{set} with three members.}, +} + +@set{set, + entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam}, + annotation = {A \texttt{set} with three members discussing the standard + model of particle physics.}, +} + +@collection{collection:matuz-miller_contem=litera=critic_1990gale, + title = {Contemporary Literary Criticism}, + year = {1990}, + location = {Detroit}, + publisher = {Gale}, + volume = {61}, + pages = {204--208}, + editor = {Matuz, Roger and Miller, Helen}, + keywords = {narration}, + langid = {english}, + langidopts = {variant=american}, + annotation = {A \texttt{collection} entry providing the excerpt information + for the \texttt{article:doody_heming=style=jakes_1974} entry. Note the format of the \texttt{ + pages} field}, +} + +@article{article:aksin-turkmen_effect=immobi=on_2006, + title = {Effect of immobilization on catalytic characteristics of saturated + {Pd-N}-heterocyclic carbenes in {Mizoroki-Heck} reactions}, + author = {Aks{\i}n, {\"O}zge and T{\"u}rkmen, Hayati and Artok , Levent and + { \c{C}}etinkaya, Bekir and Ni, Chaoying and B{\" u}y{ \"u}kg{\"u} + ng{ \" o}r, Orhan and {\"O}zkal, Erhan}, + volume = {691}, + number = {13}, + pages = {3027--3036}, + journaltitle = jomch, + date = {2006}, + indextitle = {Effect of immobilization on catalytic characteristics}, +} + +@article{article:angenendt_honore=salvat=vom_2002, + title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der Patrozinienkunde}, + shorttitle = {In Honore Salvatoris}, + author = {Angenendt, Arnold}, + volume = {97}, + pages = {431--456, 791--823}, + journaltitle = {Revue d'Histoire Eccl{\'e}siastique}, + date = {2002}, + langid = {german}, + indextitle = {In Honore Salvatoris}, + annotation = {A German article in a French journal. Apart from that, a + typical \texttt{article} entry. Note the \texttt{indextitle} + field}, +} + +@book{book:aristotle_de=anima_1907cambr#unive#press, + title = {De Anima}, + author = {Aristotle}, + location = {Cambridge}, + publisher = cup, + date = {1907}, + editor = {Hicks, Robert Drew}, + keywords = {primary, ancient, philosophy, athens}, + langid = {english}, + langidopts = {variant=british}, + annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ + editor}}, +} + +@book{book:aristotle_physic_1929g#p#putna, + title = {Physics}, + shorttitle = {Physics}, + author = {Aristotle}, + location = {New York}, + publisher = {G. P. Putnam}, + url = {https://www.infobooks.org/authors/classic/aristotle-books/#Physic}, + date = {1929}, + translator = {Wicksteed, P. H. and Cornford, F. M.}, + keywords = {primary, ancient, philosophy}, + langid = {english}, + langidopts = {variant=american}, + file = {~/Documents/coding/projects/bibiman/tests/book:aristotle_physic_1929g#p#putna.pdf}, + annotation = {A \texttt{book} entry with a \texttt{translator} field}, + abstract = {The Physics is a work by Aristotle dedicated to the study of + nature. Regarded by Heidegger as "the fundamental work of Western + philosophy", it presents the renowned distinction between the + four types of cause, as well as reflections on chance, motion, + infinity, and other fundamental concepts. It is here that + Aristotle sets out his celebrated paradox of time.}, +} + +@book{book:aristotle_poetic_1968clare#press, + title = {Poetics}, + shorttitle = {Poetics}, + author = {Aristotle}, + location = {Oxford}, + publisher = {Clarendon Press}, + series = {Clarendon {Aristotle}}, + date = {1968}, + editor = {Lucas, D. W.}, + keywords = {primary}, + langid = {english}, + langidopts = {variant=british}, + annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{ + editor} as well as a \texttt{series} field}, +} + +@mvbook{mvbook:aristotle_rhetor=aristo=with_1877cambr#unive#press, + title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward + Meredith Cope}}, + shorttitle = {Rhetoric}, + author = {Aristotle}, + publisher = cup, + date = {1877}, + editor = {Cope, Edward Meredith}, + commentator = {Cope, Edward Meredith}, + volumes = {3}, + keywords = {primary}, + langid = {english}, + langidopts = {variant=british}, + sorttitle = {Rhetoric of Aristotle}, + indextitle = {Rhetoric of {Aristotle}, The}, + annotation = {A commented edition. Note the concatenation of the \texttt{ + editor} and \texttt{commentator} fields as well as the \texttt{ + volumes}, \texttt{sorttitle}, and \texttt{indextitle} fields}, +} + +@book{book:augustine_hetero=cataly=synthe_1995marce#dekke, + title = {Heterogeneous catalysis for the synthetic \textit{chemist}}, + shorttitle = {Heterogeneous catalysis}, + author = {Augustine, Robert L.}, + location = {New York}, + publisher = {Marcel Dekker}, + date = {1995}, + langid = {english}, + langidopts = {variant=american}, + annotation = {A plain \texttt{book} entry}, + keywords = {chemistry}, +} + +@book{book:averroes_epistl=on=possib_1982jewis#theol#semin#ameri, + title = {The Epistle on the Possibility of Conjunction with the Active + Intellect by {Ibn Rushd} with the Commentary of {Moses Narboni}}, + shorttitle = {Possibility of Conjunction}, + author = {Averroes}, + location = {New York}, + publisher = {Jewish Theological Seminary of America}, + series = {Moreshet: Studies in {Jewish} History, Literature and Thought}, + number = {7}, + date = {1982}, + editor = {Bland, Kalman P.}, + translator = {Bland, Kalman P.}, + keywords = {primary}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Epistle on the Possibility of Conjunction, The}, + annotation = {A \texttt{book} entry with a \texttt{series} and a \texttt{ + number}. Note the concatenation of the \texttt{editor} and + \texttt{translator} fields as well as the \texttt{indextitle} + field}, +} + +@article{article:baez-lauda_higher=algebr=v_2004, + title = {Higher-Dimensional Algebra {V}: 2-Groups}, + author = {Baez, John C. and Lauda, Aaron D.}, + volume = {12}, + pages = {423--491}, + journaltitle = {Theory and Applications of Categories}, + date = {2004}, + version = {3}, + eprint = {math/0307200v3}, + eprinttype = {arxiv}, + langid = {english}, + keywords = {math}, + langidopts = {variant=american}, + annotation = {An \texttt{article} with \texttt{eprint} and \texttt{ + eprinttype} fields. Note that the arXiv reference is + transformed into a clickable link if \texttt{hyperref} support + has been enabled. Compare \texttt{baez\slash online}, which is + the same item given as an \texttt{online} entry}, +} + +@article{article:bertram-wentworth_gromov=invari=holomo_1996, + title = {Gromov invariants for holomorphic maps on {Riemann} surfaces}, + shorttitle = {Gromov invariants}, + author = {Bertram, Aaron and Wentworth, Richard}, + volume = {9}, + number = {2}, + pages = {529--571}, + journaltitle = jams, + date = {1996}, + langid = {english}, + langidopts = {variant=american}, + annotation = {An \texttt{article} entry with a \texttt{volume} and a \texttt + {number} field}, +} + +@article{article:doody_heming=style=jakes_1974, + title = {Hemingway's Style and {Jake's} Narration}, + author = {Doody, Terrence}, + year = {1974}, + journal = {The Journal of Narrative Technique}, + volume = {4}, + number = {3}, + pages = {212--225}, + langid = {english}, + langidopts = {variant=american}, + related = {matuz:article:doody_heming=style=jakes_1974}, + relatedstring = {\autocap{e}xcerpt in}, + annotation = {An \texttt{article} entry cited as an excerpt from a \texttt{ + collection} entry. Note the format of the \texttt{related} and + \texttt{relatedstring} fields}, +} + +@article{article:gillies_herder=prepar=goethe_1933, + title = {Herder and the Preparation of {Goethe's} Idea of World Literature}, + author = {Gillies, Alexander}, + series = {newseries}, + volume = {9}, + pages = {46--67}, + journaltitle = {Publications of the English Goethe Society}, + date = {1933}, + langid = {english}, + langidopts = {variant=british}, + annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt + {volume} field. Note that format of the \texttt{series} field + in the database file}, +} + +@article{article:glashow_partia=symmet=weak_1961, + title = {Partial Symmetries of Weak Interactions}, + author = {Glashow, Sheldon}, + volume = {22}, + pages = {579--588}, + journaltitle = {Nucl.~Phys.}, + date = {1961}, +} + +@article{article:herrmann-ofele_carboc=carben=as_2006, + title = {A carbocyclic carbene as an efficient catalyst ligand for {C--C} + coupling reactions}, + author = {Herrmann, Wolfgang A. and {\"O}fele, Karl and Schneider, Sabine K. + and Herdtweck, Eberhardt and Hoffmann, Stephan D.}, + volume = {45}, + number = {23}, + pages = {3859--3862}, + journaltitle = anch-ie, + date = {2006}, + indextitle = {Carbocyclic carbene as an efficient catalyst, A}, +} + +@article{article:hostetler-wingate_alkane=gold=cluste_1998, + title = {Alkanethiolate gold cluster molecules with core diameters from 1.5 + to 5.2~{nm}}, + shorttitle = {Alkanethiolate gold cluster molecules}, + author = {Hostetler, Michael J. and Wingate, Julia E. and Zhong, Chuan-Jian + and Harris, Jay E. and Vachet, Richard W. and Clark, Michael R. and + Londono, J. David and Green, Stephen J. and Stokes, Jennifer J. and + Wignall, George D. and Glish, Gary L. and Porter, Marc D. and Evans + , Neal D. and Murray, Royce W.}, + volume = {14}, + number = {1}, + pages = {17--30}, + journaltitle = {Langmuir}, + date = {1998}, + subtitle = {Core and monolayer properties as a function of core size}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Alkanethiolate gold cluster molecules}, + annotation = {An \texttt{article} entry with \arabic{author} authors. By + default, long author and editor lists are automatically + truncated. This is configurable}, +} + +@article{article:kastenholz-hunenberger_comput=method=ionic_2006, + title = {Computation of methodology\hyphen independent ionic solvation free + energies from molecular simulations}, + author = {Kastenholz, M. A. and H{\"u}nenberger, Philippe H.}, + volume = {124}, + doi = {10.1063/1.2172593}, + journaltitle = jchph, + date = {2006}, + subtitle = {{I}. {The} electrostatic potential in molecular liquids}, + eid = {124106}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Computation of ionic solvation free energies}, + annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{ + doi} field. Note that the \textsc{doi} is transformed into a + clickable link if \texttt{hyperref} support has been enabled}, + abstract = {The computation of \texttt{ionic} solvation free energies from atomistic + simulations is a surprisingly difficult problem that has found no + satisfactory solution for more than 15 years. The reason is that + the charging free energies evaluated from such simulations are + affected by very large errors. One of these is related to the + choice of a specific convention for summing up the contributions + of solvent charges to the electrostatic potential in the ionic + cavity, namely, on the basis of point charges within entire + solvent molecules (M scheme) or on the basis of individual point + charges (P scheme). The use of an inappropriate convention may + lead to a charge-independent offset in the calculated potential, + which depends on the details of the summation scheme, on the + quadrupole-moment trace of the solvent molecule, and on the + approximate form used to represent electrostatic interactions in + the system. However, whether the M or P scheme (if any) + represents the appropriate convention is still a matter of + on-going debate. The goal of the present article is to settle + this long-standing controversy by carefully analyzing (both + analytically and numerically) the properties of the electrostatic + potential in molecular liquids (and inside cavities within them). + }, +} + +@article{article:sarfraz-razzak_techni=sectio=algori_2002, + title = {Technical section: {An} algorithm for automatic capturing of the + font outlines}, + author = {M. Sarfraz and M. F. A. Razzak}, + year = {2002}, + journal = {Computers and Graphics}, + volume = {26}, + number = {5}, + pages = {795--804}, + issn = {0097-8493}, + annotation = {An \texttt{article} entry with an \texttt{issn} field}, +} + +@article{article:reese_georgi=anglos=diplom_1958, + title = {Georgia in {Anglo-Spanish} Diplomacy, 1736--1739}, + author = {Reese, Trevor R.}, + series = {3}, + volume = {15}, + pages = {168--190}, + journaltitle = {William and Mary Quarterly}, + date = {1958}, + langid = {english}, + langidopts = {variant=american}, + annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt + {volume} field. Note the format of the series. If the value of + the \texttt{series} field is an integer, this number is printed + as an ordinal and the string \enquote*{series} is appended + automatically}, +} + +@article{article:shore_twiceb=once=concei_1991, + title = {Twice-Born, Once Conceived}, + author = {Shore, Bradd}, + series = {newseries}, + volume = {93}, + number = {1}, + pages = {9--27}, + journaltitle = {American Anthropologist}, + date = {1991-03}, + subtitle = {Meaning Construction and Cultural Cognition}, + annotation = {An \texttt{article} entry with \texttt{series}, \texttt{volume + }, and \texttt{number} fields. Note the format of the \texttt{ + series} which is a localization key}, +} + +@article{article:sigfridsson-ryde_compar=method=derivi_1998, + title = {Comparison of methods for deriving atomic charges from the + electrostatic potential and moments}, + author = {Sigfridsson, Emma and Ryde, Ulf}, + volume = {19}, + number = {4}, + pages = {377--395}, + doi = {10.1002/(SICI)1096-987X(199803)19:4<377::AID-JCC1>3.0.CO;2-P}, + journaltitle = {Journal of Computational Chemistry}, + date = {1998}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {Methods for deriving atomic charges}, + annotation = {An \texttt{article} entry with \texttt{volume}, \texttt{number + }, and \texttt{doi} fields. Note that the \textsc{doi} is + transformed into a clickable link if \texttt{hyperref} support + has been enabled}, + abstract = {Four methods for deriving partial atomic charges from the + quantum chemical electrostatic potential (CHELP, CHELPG, + Merz-Kollman, and RESP) have been compared and critically + evaluated. It is shown that charges strongly depend on how and + where the potential points are selected. Two alternative methods + are suggested to avoid the arbitrariness in the point-selection + schemes and van der Waals exclusion radii: CHELP-BOW, which also + estimates the charges from the electrostatic potential, but with + potential points that are Boltzmann-weighted after their + occurrence in actual simulations using the energy function of the + program in which the charges will be used, and CHELMO, which + estimates the charges directly from the electrostatic multipole + moments. Different criteria for the quality of the charges are + discussed.}, +} + +@article{article:spiegelberg_intent=intent=schola_1969, + title = {\mkbibquote{Intention} und \mkbibquote{Intentionalit{\"a}t} in der + Scholastik, bei Brentano und Husserl}, + shorttitle = {Intention und Intentionalit{\"a}t}, + author = {Spiegelberg, Herbert}, + volume = {29}, + pages = {189--216}, + journaltitle = {Studia Philosophica}, + date = {1969}, + langid = {german}, + sorttitle = {Intention und Intentionalitat in der Scholastik, bei Brentano + und Husserl}, + indexsorttitle = {Intention und Intentionalitat in der Scholastik, bei + Brentano und Husserl}, + annotation = {An \texttt{article} entry. Note the \texttt{sorttitle} and + \texttt{indexsorttitle} fields and the markup of the quotes in + the database file}, +} + +@article{article:springer_mediae=pilgri=routes_1950, + title = {Mediaeval Pilgrim Routes from {Scandinavia} to {Rome}}, + shorttitle = {Mediaeval Pilgrim Routes}, + author = {Springer, Otto}, + volume = {12}, + pages = {92--122}, + journaltitle = {Mediaeval Studies}, + date = {1950}, + langid = {english}, + langidopts = {variant=british}, + annotation = {A plain \texttt{article} entry}, +} + +@article{article:weinberg_model=lepton_1967, + title = {A Model of Leptons}, + author = {Weinberg, Steven}, + volume = {19}, + pages = {1264--1266}, + journaltitle = {Phys.~Rev.~Lett.}, + date = {1967}, +} + +@string{anch-ie = {Angew.~Chem. Int.~Ed.}} + +@string{cup = {Cambridge University Press}} + +@string{dtv = {Deutscher Taschenbuch-Verlag}} + +@string{hup = {Harvard University Press}} + +@string{jams = {J.~Amer. Math. Soc.}} + +@string{jchph = {J.~Chem. Phys.}} + +@string{jomch = {J.~Organomet. Chem.}} + +@string{pup = {Princeton University Press}} + +@incollection{incollection:westfahl_true=fronti, + title = {The True Frontier}, + author = {Westfahl, Gary}, + pages = {55--65}, + subtitle = {Confronting and Avoiding the Realities of Space in {American} + Science Fiction Films}, + crossref = {westfahl:frontier}, + langid = {english}, + langidopts = {variant=american}, + indextitle = {True Frontier, The}, + annotation = {A cross-referenced article from a \texttt{collection}. This is + an \texttt{incollection} entry with a \texttt{crossref} field. + Note the \texttt{subtitle} and \texttt{indextitle} fields}, +} + +@article{article:yoon-ryu_pallad=pincer=comple_2006, + title = {Palladium pincer complexes with reduced bond angle strain: + efficient catalysts for the {Heck} reaction}, + author = {Yoon, Myeong S. and Ryu, Dowook and Kim, Jeongryul and Ahn, Kyo + Han}, + volume = {25}, + number = {10}, + pages = {2409--2411}, + journaltitle = {Organometallics}, + date = {2006}, + indextitle = {Palladium pincer complexes}, +} diff --git a/tests/test-config.toml b/tests/test-config.toml index d3e42c5..8dd8014 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -61,10 +61,13 @@ custom_column = "series" # year_color = "135" [citekey_formatter] -fields = ["entrytype;;;;:", "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ] +fields = ["shorthand;;;;+","entrytype;;;;:", "author;2;;-;_", "title;3;6;=;_", "year", "publisher;;5;#;" ] # fields = [ # CamelCase test # "author;2;;;", # "title;5;5;;", # "year" # ] case = "lowercase" +ascii_only = true +# ignored_words = ["the"] +# ignored_chars = ["?", "."] -- cgit v1.2.3 From c62b83e02359c24973344699116acc12b4a04108 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 14 Oct 2025 08:54:35 +0200 Subject: skip set and xdata entries by default --- src/bibiman/citekeys.rs | 7 +++++-- src/bibiman/citekeys/citekey_utils.rs | 5 +++++ src/config.rs | 10 ++++++++++ tests/biblatex-test-citekeys.bib | 4 ++-- 4 files changed, 22 insertions(+), 4 deletions(-) (limited to 'src/config.rs') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 0cec28e..999c6cb 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -28,7 +28,7 @@ use owo_colors::OwoColorize; use serde::{Deserialize, Serialize}; use crate::{ - bibiman::citekeys::citekey_utils::{build_citekey, formatting_help}, + bibiman::citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help}, config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS}, }; @@ -167,6 +167,10 @@ impl CitekeyFormatting { pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self { let mut old_new_keys: Vec<(String, String)> = Vec::new(); for entry in self.bib_entries.iter() { + // Skip specific entries + if SKIPPED_ENTRIES.contains(&entry.entry_type.to_string().to_lowercase().as_str()) { + continue; + } old_new_keys.push(( entry.key.clone(), build_citekey( @@ -181,7 +185,6 @@ impl CitekeyFormatting { } self.old_new_keys_map = old_new_keys; - // dbg!(&self.old_new_keys_map); self } diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index 5f70224..58a8274 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -15,6 +15,8 @@ // along with this program. If not, see . ///// +use std::sync::LazyLock; + use biblatex::{ChunksExt, Entry, Type}; use deunicode::deunicode; use indoc::formatdoc; @@ -25,6 +27,8 @@ use owo_colors::{ use crate::bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully}; +pub(super) const SKIPPED_ENTRIES: [&str; 2] = ["set", "xdata"]; + pub(super) fn formatting_help() { let help = vec![ formatdoc!( @@ -200,6 +204,7 @@ pub(super) fn build_citekey( match case { Some(CitekeyCase::Lower) => new_citekey.to_lowercase(), Some(CitekeyCase::Upper) => new_citekey.to_uppercase(), + // otherwise skip, since camelcase is processed in char loop _ => new_citekey, } } diff --git a/src/config.rs b/src/config.rs index 7c1a0f8..b8d8b45 100644 --- a/src/config.rs +++ b/src/config.rs @@ -52,6 +52,10 @@ pub static IGNORED_WORDS: LazyLock> = LazyLock::new(|| { String::from("at"), String::from("to"), String::from("and"), + String::from("him"), + String::from("her"), + String::from("his"), + String::from("hers"), String::from("der"), String::from("die"), String::from("das"), @@ -63,6 +67,12 @@ pub static IGNORED_WORDS: LazyLock> = LazyLock::new(|| { String::from("und"), String::from("für"), String::from("vor"), + String::from("er"), + String::from("sie"), + String::from("es"), + String::from("ihm"), + String::from("ihr"), + String::from("ihnen"), ] }); diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib index 9767f97..34c2f33 100644 --- a/tests/biblatex-test-citekeys.bib +++ b/tests/biblatex-test-citekeys.bib @@ -1,9 +1,9 @@ -@set{set, +@set{SET, entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006}, annotation = {A \texttt{set} with three members.}, } -@set{set, +@set{stdmodel, entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam}, annotation = {A \texttt{set} with three members discussing the standard model of particle physics.}, -- cgit v1.2.3 From 2dc231247757a9a80b1925ed215f53f54eececa5 Mon Sep 17 00:00:00 2001 From: lukeflo Date: Wed, 15 Oct 2025 07:28:20 +0200 Subject: fix tests, remove unneeded imports, add description --- src/bibiman/citekeys.rs | 6 +++--- src/bibiman/citekeys/citekey_utils.rs | 3 --- src/config.rs | 1 + 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src/config.rs') diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index 8f70ab0..fdeed14 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -241,9 +241,9 @@ impl<'a> CitekeyFormatting<'a> { /// That will prevent the replacement longer key parts that equal a full shorter /// key. /// - /// You are **very encouraged** to call this method before `update_file()` to - /// prevent replacing citekeys partly which afterwards wont match the pattern - /// anymore. + /// You are **very encouraged** to call this method before `update_file()` + /// or `update_notes_pdfs` to prevent replacing citekeys partly which + /// afterwards wont match the pattern anymore. pub fn rev_sort_new_keys_by_len(mut self) -> Self { self.old_new_keys_map .sort_by(|a, b| b.0.len().cmp(&a.0.len())); diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index b8f5600..773a2d2 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -15,10 +15,7 @@ // along with this program. If not, see . ///// -use std::sync::LazyLock; - use biblatex::{ChunksExt, Entry, Type}; -use deunicode::deunicode; use indoc::formatdoc; use owo_colors::{ OwoColorize, diff --git a/src/config.rs b/src/config.rs index b8d8b45..47e145c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -501,6 +501,7 @@ mod tests { year_color = "135" [citekey_formatter] + ascii_only = true "#, )?; -- cgit v1.2.3