From 67afd67d4d51a00079269d431a7058fc50750886 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Tue, 7 Oct 2025 15:05:47 +0200
Subject: implement basic citekey formatting:

* Reads patterns and parses them.

TODO:

* **Fully** sanitize Latex macros
* Preprocess complex and regularly used fields like `author`
* Write changes to original bib file
---
 src/bibiman.rs           |   4 +-
 src/bibiman/bibisetup.rs |  10 ++-
 src/bibiman/citekeys.rs  | 167 +++++++++++++++++++++++++++++++++++++++++++++++
 src/config.rs            |   8 +++
 4 files changed, 186 insertions(+), 3 deletions(-)
 create mode 100644 src/bibiman/citekeys.rs

(limited to 'src')
diff --git a/src/bibiman.rs b/src/bibiman.rs
index c423ce1..3158d73 100644
--- a/src/bibiman.rs
+++ b/src/bibiman.rs
@@ -40,6 +40,7 @@ use std::result::Result::Ok;
 use tui_input::Input;
 
 pub mod bibisetup;
+pub mod citekeys;
 pub mod entries;
 pub mod keywords;
 pub mod search;
@@ -88,13 +89,14 @@ pub struct Bibiman {
 }
 
 impl Bibiman {
-    // Constructs a new instance of [`App`].
+    /// Constructs a new instance of [`Bibiman`].
     pub fn new(args: &mut CLIArgs, cfg: &mut BibiConfig) -> Result<Self> {
         let mut main_bibfiles: Vec<PathBuf> = args.pos_args.clone();
         if cfg.general.bibfiles.is_some() {
             main_bibfiles.append(cfg.general.bibfiles.as_mut().unwrap())
         };
         let main_bibfiles = cliargs::parse_files(main_bibfiles);
+        // TODO: insert workflow for formatting citekeys
         let main_biblio = BibiSetup::new(&main_bibfiles, cfg);
         let tag_list = TagList::new(main_biblio.keyword_list.clone());
         let search_struct = BibiSearch::default();
diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index b3f788c..a83a507 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -246,8 +246,14 @@ impl BibiData {
 }
 
 impl BibiSetup {
+    /// Setup the TUI:
+    /// * Getting files
+    /// * Parse files into `biblatex::Bibliography` struct
+    /// * If wanted, format citekeys
+    /// * Get citekey vector
+    /// * Collect all keywords
+    /// * Build the entry list to be displayed
     pub fn new(main_bibfiles: &[PathBuf], cfg: &BibiConfig) -> Self {
-        // TODO: Needs check for config file path as soon as config file is impl
         Self::check_files(main_bibfiles);
         let bibfilestring = Self::bibfiles_to_string(main_bibfiles);
         let bibliography = biblatex::Bibliography::parse(&bibfilestring).unwrap();
@@ -264,7 +270,7 @@ impl BibiSetup {
         }
     }
 
-    // Check which file format the passed file has
+    /// Check which file format the passed file has
     fn check_files(main_bibfiles: &[PathBuf]) {
         if main_bibfiles.is_empty() {
             println!(
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
new file mode 100644
index 0000000..4c36e80
--- /dev/null
+++ b/src/bibiman/citekeys.rs
@@ -0,0 +1,167 @@
+use biblatex::Bibliography;
+use color_eyre::eyre::eyre;
+use owo_colors::OwoColorize;
+
+use crate::config::BibiConfig;
+
+#[derive(Debug, Default, Clone)]
+pub(crate) struct CitekeyFormatting {
+    bib_entries: Bibliography,
+    fields: Vec<String>,
+}
+
+impl CitekeyFormatting {
+    /// Start Citekey formatting with building a new instance of `CitekeyFormatting`
+    /// Formatting is processed file by file, because `bibman` can handle
+    /// multi-file setups.
+    /// The `Bibliography` inserted will be edited in place with the new citekeys.
+    /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography`
+    pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result<Self> {
+        let fields = cfg.citekey_formatter.fields.clone();
+        if fields.is_empty() {
+            return Err(eyre!(
+                "To format all citekeys, you need to provide {} values in the config file",
+                "fields".bold()
+            ));
+        }
+        Ok(Self {
+            bib_entries,
+            fields,
+        })
+    }
+
+    pub fn do_formatting(&mut self) {
+        for entry in self.bib_entries.iter_mut() {
+            let mut new_citekey = String::new();
+            for pattern in self.fields.iter() {
+                let (field, word_count, char_count, inner_delimiter, trailing_delimiter) =
+                    split_formatting_pat(pattern);
+                let formatted_field_str = {
+                    let mut formatted_str = String::new();
+                    let field = entry.get_as::<String>(field).expect(&format!(
+                        "Couldn't find field {}",
+                        field.bold().bright_red()
+                    ));
+                    let mut split_field = field.split_whitespace();
+                    let mut words_passed = 0;
+                    loop {
+                        if let Some(field_slice) = split_field.next() {
+                            formatted_str = formatted_str + format_word(field_slice, char_count);
+                            words_passed += 1;
+                            if word_count.is_some_and(|count| count == words_passed) {
+                                formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
+                                break;
+                            } else {
+                                formatted_str = formatted_str + inner_delimiter.unwrap_or("")
+                            }
+                        } else {
+                            formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
+                            break;
+                        };
+                    }
+                    formatted_str
+                };
+                new_citekey = new_citekey + &formatted_field_str;
+            }
+            entry.key = new_citekey;
+        }
+    }
+}
+
+fn preformat_field() {}
+
+/// Cut of word at char count index if its set
+fn format_word(word: &str, count: Option<usize>) -> &str {
+    if let Some(len) = count
+        && len < word.chars().count()
+    {
+        &word[..len]
+    } else {
+        word
+    }
+}
+
+/// Split a formatting pattern of kind
+/// `<field>;<word count>;<char count>;<inside delimiter>;<trailing delimiter>`,
+/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")`
+fn split_formatting_pat(
+    pattern: &str,
+) -> (
+    &str,
+    Option<usize>,
+    Option<usize>,
+    Option<&str>,
+    Option<&str>,
+) {
+    let mut splits = pattern.split(';');
+    (
+        splits
+            .next()
+            .expect("Need field value for formatting citekey"),
+        if let Some(next) = splits.next()
+            && next.len() > 0
+        {
+            next.parse::<usize>().ok()
+        } else {
+            None
+        },
+        if let Some(next) = splits.next()
+            && next.len() > 0
+        {
+            next.parse::<usize>().ok()
+        } else {
+            None
+        },
+        splits.next(),
+        splits.next(),
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use biblatex::Bibliography;
+    use itertools::Itertools;
+
+    use crate::bibiman::citekeys::{CitekeyFormatting, split_formatting_pat};
+
+    #[test]
+    fn split_citekey_pattern() {
+        let pattern = "title;3;5;_;_";
+
+        assert_eq!(
+            split_formatting_pat(pattern),
+            ("title", Some(3), Some(5), Some("_"), Some("_"))
+        );
+
+        let pattern = "year";
+
+        assert_eq!(
+            split_formatting_pat(pattern),
+            ("year", None, None, None, None)
+        );
+
+        let pattern = "author;1;;;_";
+        assert_eq!(
+            split_formatting_pat(pattern),
+            ("author", Some(1), None, Some(""), Some("_"))
+        );
+    }
+
+    #[test]
+    fn format_citekey_test() {
+        let src = r"@book{tolkien1937, author = {Tolkien}, title = {\enquote{Lord} of the \textbf{Rings}}, year = {1937}}";
+        let bibliography = Bibliography::parse(src).unwrap();
+        let mut formatting_struct = CitekeyFormatting {
+            bib_entries: bibliography,
+            fields: vec![
+                "author;1;;-;_".into(),
+                "title;3;3;_;_".into(),
+                "year".into(),
+            ],
+        };
+        formatting_struct.do_formatting();
+        let keys = formatting_struct.bib_entries.keys().collect_vec();
+        assert_eq!(keys[0], "Tolkien_Lor_of_the_1937");
+        assert_eq!(keys[0].to_lowercase(), "tolkien_lor_of_the_1937");
+    }
+}
diff --git a/src/config.rs b/src/config.rs
index 00a35b7..78cfef9 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -102,6 +102,7 @@ const DEFAULT_CONFIG: &str = r##"
 pub struct BibiConfig {
     pub general: General,
     pub colors: Colors,
+    pub citekey_formatter: CitekeyFormatter,
 }
 
 /// Substruct [general] in config.toml
@@ -143,6 +144,11 @@ pub struct Colors {
     pub year_color: Color,
 }
 
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct CitekeyFormatter {
+    pub fields: Vec<String>,
+}
+
 impl Default for BibiConfig {
     fn default() -> Self {
         Self {
@@ -161,6 +167,7 @@ impl Default for BibiConfig {
                 custom_column: CustomField::Pubtype,
             },
             colors: Self::dark_colors(),
+            citekey_formatter: CitekeyFormatter { fields: Vec::new() },
         }
     }
 }
@@ -187,6 +194,7 @@ impl BibiConfig {
             } else {
                 Self::dark_colors()
             },
+            citekey_formatter: CitekeyFormatter { fields: Vec::new() },
         }
     }
 
-- 
cgit v1.2.3


From a07359a9a1da0c06c040f77158be31b3883b33ac Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Wed, 8 Oct 2025 13:49:06 +0200
Subject: refine matching and preformatting of fields for citekey formattin;
 add case field and enum

---
 Cross.toml                                 |   6 ++
 src/bibiman/citekeys.rs                    | 136 ++++++++++++++++++++++++++---
 src/bibiman/sanitize.rs                    |  10 ++-
 src/bibiman/sanitize/optimized_sanitize.rs |  28 +++++-
 src/config.rs                              |  24 +++--
 5 files changed, 177 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/Cross.toml b/Cross.toml
index e7cd27b..6140bf2 100644
--- a/Cross.toml
+++ b/Cross.toml
@@ -9,3 +9,9 @@ pre-build = [
     "dpkg --add-architecture $CROSS_DEB_ARCH",
     "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH",
 ]
+
+[target.x86_64-unknown-freebsd]
+# pre-build = [
+#     "dpkg --add-architecture $CROSS_DEB_ARCH",
+#     "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH",
+# ]
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 4c36e80..a304e92 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -1,13 +1,40 @@
-use biblatex::Bibliography;
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025  lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use biblatex::{Bibliography, ChunksExt, Entry, Type};
 use color_eyre::eyre::eyre;
 use owo_colors::OwoColorize;
+use serde::{Deserialize, Serialize};
+
+use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig};
 
-use crate::config::BibiConfig;
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub(crate) enum CitekeyCase {
+    #[serde(alias = "uppercase", alias = "upper")]
+    Upper,
+    #[serde(alias = "lowercase", alias = "lower")]
+    Lower,
+}
 
 #[derive(Debug, Default, Clone)]
 pub(crate) struct CitekeyFormatting {
     bib_entries: Bibliography,
     fields: Vec<String>,
+    case: Option<CitekeyCase>,
 }
 
 impl CitekeyFormatting {
@@ -27,6 +54,7 @@ impl CitekeyFormatting {
         Ok(Self {
             bib_entries,
             fields,
+            case: cfg.citekey_formatter.case.clone(),
         })
     }
 
@@ -38,17 +66,36 @@ impl CitekeyFormatting {
                     split_formatting_pat(pattern);
                 let formatted_field_str = {
                     let mut formatted_str = String::new();
-                    let field = entry.get_as::<String>(field).expect(&format!(
-                        "Couldn't find field {}",
-                        field.bold().bright_red()
-                    ));
+                    let field = preformat_field(field, entry);
+                    // let field = if let Ok(val) = entry.get_as::<String>(field) {
+                    //     val
+                    // } else {
+                    //     eprintln!(
+                    //         "Unable to get field {} for entry {}",
+                    //         field.bright_red(),
+                    //         &entry.key.bold()
+                    //     );
+                    //     continue;
+                    // };
+                    // let field = entry.get_as::<String>(field).expect(&format!(
+                    //     "Couldn't find field {}",
+                    //     field.bold().bright_red()
+                    // ));
                     let mut split_field = field.split_whitespace();
                     let mut words_passed = 0;
+                    let word_count = if let Some(val) = word_count {
+                        val
+                    } else {
+                        field.split_whitespace().count()
+                        // split_field.size_hint().0 + 1
+                    };
+                    dbg!(word_count);
                     loop {
                         if let Some(field_slice) = split_field.next() {
                             formatted_str = formatted_str + format_word(field_slice, char_count);
                             words_passed += 1;
-                            if word_count.is_some_and(|count| count == words_passed) {
+                            // if word_count.is_some_and(|count| count == words_passed) {
+                            if word_count == words_passed {
                                 formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
                                 break;
                             } else {
@@ -68,7 +115,51 @@ impl CitekeyFormatting {
     }
 }
 
-fn preformat_field() {}
+/// Preformat some fields which are very common to be used in citekeys
+fn preformat_field(field: &str, entry: &mut Entry) -> String {
+    match field {
+        "title" => {
+            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into()))
+        }
+        "author" => {
+            if let Ok(authors) = entry.author() {
+                let mut last_names = String::new();
+                for a in authors.iter() {
+                    last_names = last_names + &a.name + " ";
+                }
+                dbg!(&last_names);
+                last_names
+            } else {
+                "NA".to_string()
+            }
+        }
+        "year" => {
+            if let Ok(date) = entry.date() {
+                date.to_chunks().format_verbatim()[..4].to_string()
+            } else {
+                entry.get_as::<String>(field).unwrap_or("NA".into())
+            }
+        }
+        "subtitle" => {
+            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into()))
+        }
+        "editor" => {
+            if let Ok(editors) = entry.editors() {
+                let mut last_names = String::new();
+                for editortypes in editors.iter() {
+                    for e in editortypes.0.iter() {
+                        last_names = last_names + &e.name + " ";
+                    }
+                }
+                last_names
+            } else {
+                "NA".to_string()
+            }
+        }
+        "pubtype" | "entrytype" => entry.entry_type.to_string(),
+        _ => entry.get_as::<String>(field).unwrap_or("Empty".into()),
+    }
+}
 
 /// Cut of word at char count index if its set
 fn format_word(word: &str, count: Option<usize>) -> &str {
@@ -122,7 +213,7 @@ mod tests {
     use biblatex::Bibliography;
     use itertools::Itertools;
 
-    use crate::bibiman::citekeys::{CitekeyFormatting, split_formatting_pat};
+    use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat};
 
     #[test]
     fn split_citekey_pattern() {
@@ -149,19 +240,36 @@ mod tests {
 
     #[test]
     fn format_citekey_test() {
-        let src = r"@book{tolkien1937, author = {Tolkien}, title = {\enquote{Lord} of the \textbf{Rings}}, year = {1937}}";
+        let src = r"
+        @book{bhambra_colonialism_social_theory_2021,
+            title         = {Colonialism and \textbf{Modern Social Theory}},
+            author        = {Bhambra, Gurminder K. and Holmwood, John},
+            location      = {Cambridge and Medford},
+            publisher     = {Polity Press},
+            date          = {2021},
+        }
+        ";
         let bibliography = Bibliography::parse(src).unwrap();
         let mut formatting_struct = CitekeyFormatting {
             bib_entries: bibliography,
             fields: vec![
-                "author;1;;-;_".into(),
-                "title;3;3;_;_".into(),
+                "entrytype;;;;:".into(),
+                "author;;;-;_".into(),
+                "title;4;3;_;_".into(),
+                "location;;4;:;_".into(),
                 "year".into(),
             ],
+            case: None,
         };
         formatting_struct.do_formatting();
         let keys = formatting_struct.bib_entries.keys().collect_vec();
-        assert_eq!(keys[0], "Tolkien_Lor_of_the_1937");
-        assert_eq!(keys[0].to_lowercase(), "tolkien_lor_of_the_1937");
+        assert_eq!(
+            keys[0],
+            "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021"
+        );
+        assert_eq!(
+            keys[0].to_lowercase(),
+            "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021"
+        );
     }
 }
diff --git a/src/bibiman/sanitize.rs b/src/bibiman/sanitize.rs
index 9ccf4c4..8c1cc43 100644
--- a/src/bibiman/sanitize.rs
+++ b/src/bibiman/sanitize.rs
@@ -26,12 +26,12 @@ use optimized_sanitize::optimized_sanitize;
 macro_rules! optimized_sanitize_bibidata {
     ($bibidata:expr) => {
         SanitizedBibiData {
-            title: optimized_sanitize(&$bibidata.title),
+            title: optimized_sanitize(false, &$bibidata.title),
             subtitle: match &$bibidata.subtitle {
                 None => None,
-                Some(subtitle) => Some(optimized_sanitize(subtitle)),
+                Some(subtitle) => Some(optimized_sanitize(false, subtitle)),
             },
-            abstract_text: optimized_sanitize(&$bibidata.abstract_text),
+            abstract_text: optimized_sanitize(false, &$bibidata.abstract_text),
         }
     };
 }
@@ -41,3 +41,7 @@ macro_rules! optimized_sanitize_bibidata {
 pub fn sanitize_one_bibidata(bibidata: &BibiData) -> SanitizedBibiData {
     optimized_sanitize_bibidata!(bibidata)
 }
+
+pub fn sanitize_single_string_fully(input: &str) -> String {
+    optimized_sanitize(true, input)
+}
diff --git a/src/bibiman/sanitize/optimized_sanitize.rs b/src/bibiman/sanitize/optimized_sanitize.rs
index 336cc56..dff4d32 100644
--- a/src/bibiman/sanitize/optimized_sanitize.rs
+++ b/src/bibiman/sanitize/optimized_sanitize.rs
@@ -31,6 +31,17 @@ static LOOKUP: phf::Map<&'static str, (&'static str, Option<&'static str>)> = ph
     r"\textsc" => ("", Some("")),
 };
 
+static LOOKUP_CLEAR_ALL: phf::Map<&'static str, (&'static str, Option<&'static str>)> = phf_map! {
+    r"\mkbibquote" => ("", Some("")),
+    r"\enquote*" => ("", Some("")),
+    r"\enquote" => ("", Some("")),
+    r"\hyphen" => ("", None),
+    r"\textbf" => ("", Some("")),
+    r"\textit" => ("", Some("")),
+    r"\texttt" => ("", Some("")),
+    r"\textsc" => ("", Some("")),
+};
+
 #[derive(Logos, Debug)]
 enum Token {
     #[token("{")]
@@ -43,7 +54,12 @@ enum Token {
     ForcedSpace,
 }
 
-pub fn optimized_sanitize(input_text: &str) -> String {
+pub fn optimized_sanitize(clear_all: bool, input_text: &str) -> String {
+    let lookup = if clear_all {
+        &LOOKUP_CLEAR_ALL
+    } else {
+        &LOOKUP
+    };
     let mut char_counter: usize = 0;
     let mut contains_macro: bool = false;
     for char in input_text.chars() {
@@ -87,7 +103,7 @@ pub fn optimized_sanitize(input_text: &str) -> String {
                     }
                     Token::LaTeXMacro => {
                         let texmacro = lex.slice();
-                        if let Some(x) = LOOKUP.get(&texmacro.trim_end()) {
+                        if let Some(x) = lookup.get(&texmacro.trim_end()) {
                             if let Some(end) = x.1 {
                                 bc_up = true;
                                 counter_actions.insert(bracket_counter + 1, end);
@@ -115,11 +131,17 @@ mod tests {
     #[test]
     fn check_sanitization() {
         let result = optimized_sanitize(
+            false,
             r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}",
         );
         assert_eq!(
             "\"Intention\" und \"Intentionen \"sind\" - bibquote-.\"",
             result
-        )
+        );
+        let result = optimized_sanitize(
+            true,
+            r"\mkbibquote {Intention} und \mkbibquote{Intentionen \mkbibquote{sind} \hyphen\ bibquote\hyphen .}",
+        );
+        assert_eq!("Intention und Intentionen sind  bibquote.", result)
     }
 }
diff --git a/src/config.rs b/src/config.rs
index 78cfef9..8a333e4 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -16,21 +16,24 @@
 /////
 
 use std::{
-    fs::{create_dir_all, File},
-    io::{stdin, Write},
+    fs::{File, create_dir_all},
+    io::{Write, stdin},
     path::PathBuf,
     str::FromStr,
 };
 
 use color_eyre::{eyre::Result, owo_colors::OwoColorize};
 use figment::{
-    providers::{Format, Serialized, Toml},
     Figment,
+    providers::{Format, Serialized, Toml},
 };
 use ratatui::style::Color;
 use serde::{Deserialize, Serialize};
 
-use crate::{bibiman::bibisetup::CustomField, cliargs::CLIArgs};
+use crate::{
+    bibiman::{bibisetup::CustomField, citekeys::CitekeyCase},
+    cliargs::CLIArgs,
+};
 
 const DEFAULT_CONFIG: &str = r##"
 # [general]
@@ -147,6 +150,7 @@ pub struct Colors {
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct CitekeyFormatter {
     pub fields: Vec<String>,
+    pub case: Option<CitekeyCase>,
 }
 
 impl Default for BibiConfig {
@@ -167,7 +171,10 @@ impl Default for BibiConfig {
                 custom_column: CustomField::Pubtype,
             },
             colors: Self::dark_colors(),
-            citekey_formatter: CitekeyFormatter { fields: Vec::new() },
+            citekey_formatter: CitekeyFormatter {
+                fields: Vec::new(),
+                case: None,
+            },
         }
     }
 }
@@ -194,7 +201,10 @@ impl BibiConfig {
             } else {
                 Self::dark_colors()
             },
-            citekey_formatter: CitekeyFormatter { fields: Vec::new() },
+            citekey_formatter: CitekeyFormatter {
+                fields: Vec::new(),
+                case: None,
+            },
         }
     }
 
@@ -352,8 +362,8 @@ fn select_opener() -> String {
 #[cfg(test)]
 mod tests {
     use figment::{
-        providers::{Format, Toml},
         Figment,
+        providers::{Format, Toml},
     };
 
     use super::BibiConfig;
-- 
cgit v1.2.3


From 8b858f92da69cfb8fa43ec861cda46eeb6ef4bbe Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Wed, 8 Oct 2025 14:39:46 +0200
Subject: case parsing from config, needs to be implemented for citekey struct

---
 src/bibiman/citekeys.rs | 95 +++++++++++++++++++++++--------------------------
 src/config.rs           |  8 +++--
 2 files changed, 49 insertions(+), 54 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index a304e92..118ae3e 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -23,7 +23,7 @@ use serde::{Deserialize, Serialize};
 use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig};
 
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub(crate) enum CitekeyCase {
+pub enum CitekeyCase {
     #[serde(alias = "uppercase", alias = "upper")]
     Upper,
     #[serde(alias = "lowercase", alias = "lower")]
@@ -44,7 +44,11 @@ impl CitekeyFormatting {
     /// The `Bibliography` inserted will be edited in place with the new citekeys.
     /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography`
     pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result<Self> {
-        let fields = cfg.citekey_formatter.fields.clone();
+        let fields = cfg
+            .citekey_formatter
+            .fields
+            .clone()
+            .expect("Need to define fields in config to format citekeys");
         if fields.is_empty() {
             return Err(eyre!(
                 "To format all citekeys, you need to provide {} values in the config file",
@@ -58,65 +62,54 @@ impl CitekeyFormatting {
         })
     }
 
+    /// Process the actual formatting. The citekey of every entry will be updated.
     pub fn do_formatting(&mut self) {
         for entry in self.bib_entries.iter_mut() {
-            let mut new_citekey = String::new();
-            for pattern in self.fields.iter() {
-                let (field, word_count, char_count, inner_delimiter, trailing_delimiter) =
-                    split_formatting_pat(pattern);
-                let formatted_field_str = {
-                    let mut formatted_str = String::new();
-                    let field = preformat_field(field, entry);
-                    // let field = if let Ok(val) = entry.get_as::<String>(field) {
-                    //     val
-                    // } else {
-                    //     eprintln!(
-                    //         "Unable to get field {} for entry {}",
-                    //         field.bright_red(),
-                    //         &entry.key.bold()
-                    //     );
-                    //     continue;
-                    // };
-                    // let field = entry.get_as::<String>(field).expect(&format!(
-                    //     "Couldn't find field {}",
-                    //     field.bold().bright_red()
-                    // ));
-                    let mut split_field = field.split_whitespace();
-                    let mut words_passed = 0;
-                    let word_count = if let Some(val) = word_count {
-                        val
+            entry.key = build_citekey(entry, &self.fields);
+        }
+    }
+}
+
+/// Build the citekey from the patterns defined in the config file
+fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String {
+    let mut new_citekey = String::new();
+    for pattern in pattern_fields.iter() {
+        let (field, word_count, char_count, inner_delimiter, trailing_delimiter) =
+            split_formatting_pat(pattern);
+        let formatted_field_str = {
+            let mut formatted_str = String::new();
+            let field = preformat_field(field, entry);
+            let mut split_field = field.split_whitespace();
+            let mut words_passed = 0;
+            let word_count = if let Some(val) = word_count {
+                val
+            } else {
+                field.split_whitespace().count()
+            };
+            loop {
+                if let Some(field_slice) = split_field.next() {
+                    formatted_str = formatted_str + format_word(field_slice, char_count);
+                    words_passed += 1;
+                    if word_count == words_passed {
+                        formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
+                        break;
                     } else {
-                        field.split_whitespace().count()
-                        // split_field.size_hint().0 + 1
-                    };
-                    dbg!(word_count);
-                    loop {
-                        if let Some(field_slice) = split_field.next() {
-                            formatted_str = formatted_str + format_word(field_slice, char_count);
-                            words_passed += 1;
-                            // if word_count.is_some_and(|count| count == words_passed) {
-                            if word_count == words_passed {
-                                formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
-                                break;
-                            } else {
-                                formatted_str = formatted_str + inner_delimiter.unwrap_or("")
-                            }
-                        } else {
-                            formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
-                            break;
-                        };
+                        formatted_str = formatted_str + inner_delimiter.unwrap_or("")
                     }
-                    formatted_str
+                } else {
+                    formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
+                    break;
                 };
-                new_citekey = new_citekey + &formatted_field_str;
             }
-            entry.key = new_citekey;
-        }
+            formatted_str
+        };
+        new_citekey = new_citekey + &formatted_field_str;
     }
+    new_citekey
 }
 
 /// Preformat some fields which are very common to be used in citekeys
-fn preformat_field(field: &str, entry: &mut Entry) -> String {
+fn preformat_field(field: &str, entry: &Entry) -> String {
     match field {
         "title" => {
             sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into()))
diff --git a/src/config.rs b/src/config.rs
index 8a333e4..a5df61c 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -149,7 +149,7 @@ pub struct Colors {
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct CitekeyFormatter {
-    pub fields: Vec<String>,
+    pub fields: Option<Vec<String>>,
     pub case: Option<CitekeyCase>,
 }
 
@@ -172,7 +172,7 @@ impl Default for BibiConfig {
             },
             colors: Self::dark_colors(),
             citekey_formatter: CitekeyFormatter {
-                fields: Vec::new(),
+                fields: None,
                 case: None,
             },
         }
@@ -202,7 +202,7 @@ impl BibiConfig {
                 Self::dark_colors()
             },
             citekey_formatter: CitekeyFormatter {
-                fields: Vec::new(),
+                fields: None,
                 case: None,
             },
         }
@@ -400,6 +400,8 @@ mod tests {
                     author_color = "38"
                     title_color = "37"
                     year_color = "135"
+
+                    [citekey_formatter]
                 "#,
             )?;
 
-- 
cgit v1.2.3


From 952dc94b412ffcff26a59c37f3112079c78058ff Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Wed, 8 Oct 2025 22:30:46 +0200
Subject: use vector for old new key pairs

---
 src/bibiman/citekeys.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 118ae3e..b389da2 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -15,6 +15,12 @@
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 /////
 
+use std::{
+    fs::File,
+    io::Write,
+    path::{Path, PathBuf},
+};
+
 use biblatex::{Bibliography, ChunksExt, Entry, Type};
 use color_eyre::eyre::eyre;
 use owo_colors::OwoColorize;
@@ -32,9 +38,11 @@ pub enum CitekeyCase {
 
 #[derive(Debug, Default, Clone)]
 pub(crate) struct CitekeyFormatting {
+    bibfile_path: PathBuf,
     bib_entries: Bibliography,
     fields: Vec<String>,
     case: Option<CitekeyCase>,
+    old_new_keys_map: Vec<(String, String)>,
 }
 
 impl CitekeyFormatting {
@@ -43,7 +51,11 @@ impl CitekeyFormatting {
     /// multi-file setups.
     /// The `Bibliography` inserted will be edited in place with the new citekeys.
     /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography`
-    pub fn new(cfg: &BibiConfig, bib_entries: Bibliography) -> color_eyre::Result<Self> {
+    pub fn new<P: AsRef<Path>>(
+        cfg: &BibiConfig,
+        path: P,
+        bib_entries: Bibliography,
+    ) -> color_eyre::Result<Self> {
         let fields = cfg
             .citekey_formatter
             .fields
@@ -56,22 +68,39 @@ impl CitekeyFormatting {
             ));
         }
         Ok(Self {
+            bibfile_path: path.as_ref().to_path_buf(),
             bib_entries,
             fields,
             case: cfg.citekey_formatter.case.clone(),
+            old_new_keys_map: Vec::new(),
         })
     }
 
     /// Process the actual formatting. The citekey of every entry will be updated.
     pub fn do_formatting(&mut self) {
+        let mut old_new_keys: Vec<(String, String)> = Vec::new();
         for entry in self.bib_entries.iter_mut() {
-            entry.key = build_citekey(entry, &self.fields);
+            old_new_keys.push((
+                entry.key.clone(),
+                build_citekey(entry, &self.fields, self.case.as_ref()),
+            ));
         }
+
+        self.old_new_keys_map = old_new_keys;
+    }
+
+    /// Write entries with updated citekeys to bibfile
+    pub fn update_file(&self) -> color_eyre::Result<()> {
+        let mut file = File::open(&self.bibfile_path)?;
+
+        file.write_all(self.bib_entries.to_biblatex_string().as_bytes())?;
+
+        Ok(())
     }
 }
 
 /// Build the citekey from the patterns defined in the config file
-fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String {
+fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String {
     let mut new_citekey = String::new();
     for pattern in pattern_fields.iter() {
         let (field, word_count, char_count, inner_delimiter, trailing_delimiter) =
@@ -105,7 +134,14 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String]) -> String {
         };
         new_citekey = new_citekey + &formatted_field_str;
     }
-    new_citekey
+    if let Some(case_format) = case {
+        match case_format {
+            CitekeyCase::Lower => new_citekey.to_lowercase(),
+            CitekeyCase::Upper => new_citekey.to_uppercase(),
+        }
+    } else {
+        new_citekey
+    }
 }
 
 /// Preformat some fields which are very common to be used in citekeys
@@ -203,6 +239,8 @@ fn split_formatting_pat(
 
 #[cfg(test)]
 mod tests {
+    use std::path::PathBuf;
+
     use biblatex::Bibliography;
     use itertools::Itertools;
 
@@ -244,6 +282,7 @@ mod tests {
         ";
         let bibliography = Bibliography::parse(src).unwrap();
         let mut formatting_struct = CitekeyFormatting {
+            bibfile_path: PathBuf::new(),
             bib_entries: bibliography,
             fields: vec![
                 "entrytype;;;;:".into(),
@@ -253,6 +292,7 @@ mod tests {
                 "year".into(),
             ],
             case: None,
+            old_new_keys_map: Vec::new(),
         };
         formatting_struct.do_formatting();
         let keys = formatting_struct.bib_entries.keys().collect_vec();
@@ -260,9 +300,23 @@ mod tests {
             keys[0],
             "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021"
         );
+        formatting_struct.case = Some(CitekeyCase::Lower);
+        formatting_struct.do_formatting();
+        let keys = formatting_struct.bib_entries.keys().collect_vec();
         assert_eq!(
-            keys[0].to_lowercase(),
+            keys[0],
             "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021"
         );
+        // let bib_string = formatting_struct.bib_entries.to_biblatex_string();
+        // let new_entry = r"
+        // @book{book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021,
+        //     title         = {Colonialism and \textbf{Modern Social Theory}},
+        //     author        = {Bhambra, Gurminder K. and Holmwood, John},
+        //     location      = {Cambridge and Medford},
+        //     publisher     = {Polity Press},
+        //     date          = {2021},
+        // }
+        // ";
+        // assert_eq!(new_entry, bib_string);
     }
 }
-- 
cgit v1.2.3


From 7266a14753ed5d572aeed584b66b07d1b9921ca7 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Thu, 9 Oct 2025 14:28:55 +0200
Subject: rewrite cli parsing; need to implement format-citekeys cli parsing

---
 src/bibiman/citekeys.rs | 105 ++++++++++++++++++++++++++++++++++++------------
 src/cliargs.rs          |  52 +++++++++++++++++++-----
 src/main.rs             |  30 +-------------
 3 files changed, 124 insertions(+), 63 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index b389da2..b7995ac 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -16,7 +16,7 @@
 /////
 
 use std::{
-    fs::File,
+    fs::OpenOptions,
     io::Write,
     path::{Path, PathBuf},
 };
@@ -38,7 +38,9 @@ pub enum CitekeyCase {
 
 #[derive(Debug, Default, Clone)]
 pub(crate) struct CitekeyFormatting {
-    bibfile_path: PathBuf,
+    /// bibfile to replace keys at. The optional fields defines a differing
+    /// output file to write to, otherwise original file will be overwritten.
+    bibfile_path: (PathBuf, Option<PathBuf>),
     bib_entries: Bibliography,
     fields: Vec<String>,
     case: Option<CitekeyCase>,
@@ -54,6 +56,7 @@ impl CitekeyFormatting {
     pub fn new<P: AsRef<Path>>(
         cfg: &BibiConfig,
         path: P,
+        target: Option<P>,
         bib_entries: Bibliography,
     ) -> color_eyre::Result<Self> {
         let fields = cfg
@@ -68,7 +71,10 @@ impl CitekeyFormatting {
             ));
         }
         Ok(Self {
-            bibfile_path: path.as_ref().to_path_buf(),
+            bibfile_path: (
+                path.as_ref().to_path_buf(),
+                target.map(|p| p.as_ref().to_path_buf()),
+            ),
             bib_entries,
             fields,
             case: cfg.citekey_formatter.case.clone(),
@@ -77,9 +83,9 @@ impl CitekeyFormatting {
     }
 
     /// Process the actual formatting. The citekey of every entry will be updated.
-    pub fn do_formatting(&mut self) {
+    pub fn do_formatting(&mut self) -> &mut Self {
         let mut old_new_keys: Vec<(String, String)> = Vec::new();
-        for entry in self.bib_entries.iter_mut() {
+        for entry in self.bib_entries.iter() {
             old_new_keys.push((
                 entry.key.clone(),
                 build_citekey(entry, &self.fields, self.case.as_ref()),
@@ -87,16 +93,47 @@ impl CitekeyFormatting {
         }
 
         self.old_new_keys_map = old_new_keys;
+
+        self
     }
 
     /// Write entries with updated citekeys to bibfile
     pub fn update_file(&self) -> color_eyre::Result<()> {
-        let mut file = File::open(&self.bibfile_path)?;
+        let source_file = self.bibfile_path.0.as_path();
+        let target_file = if let Some(path) = &self.bibfile_path.1 {
+            path
+        } else {
+            source_file
+        };
+        let mut content = std::fs::read_to_string(source_file)?;
 
-        file.write_all(self.bib_entries.to_biblatex_string().as_bytes())?;
+        for (old_key, new_key) in self.old_new_keys_map.iter() {
+            content = content.replace(old_key, new_key);
+        }
+
+        let mut new_file = OpenOptions::new()
+            .truncate(true)
+            .write(true)
+            .create(true)
+            .open(target_file)?;
+
+        new_file.write_all(content.as_bytes())?;
 
         Ok(())
     }
+
+    /// Sort the vector containing old/new citekey pairs by the length of the latter.
+    /// That will prevent the replacement longer key parts that equal a full shorter
+    /// key.
+    ///
+    /// You are **very encouraged** to call this method before `update_file()` to
+    /// prevent replacing citekeys partly which afterwards wont match the pattern
+    /// anymore.
+    pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self {
+        self.old_new_keys_map
+            .sort_by(|a, b| b.1.len().cmp(&a.1.len()));
+        self
+    }
 }
 
 /// Build the citekey from the patterns defined in the config file
@@ -272,6 +309,18 @@ mod tests {
     #[test]
     fn format_citekey_test() {
         let src = r"
+        @article{bos_latex_metadata_and_publishing_workflows_2023,
+            title         = {{LaTeX}, metadata, and publishing workflows},
+            author        = {Bos, Joppe W. and {McCurley}, Kevin S.},
+            year          = {2023},
+            month         = apr,
+            journal       = {arXiv},
+            number        = {{arXiv}:2301.08277},
+            doi           = {10.48550/arXiv.2301.08277},
+            url           = {http://arxiv.org/abs/2301.08277},
+            urldate       = {2023-08-22},
+            note          = {type: article},
+        }
         @book{bhambra_colonialism_social_theory_2021,
             title         = {Colonialism and \textbf{Modern Social Theory}},
             author        = {Bhambra, Gurminder K. and Holmwood, John},
@@ -282,7 +331,7 @@ mod tests {
         ";
         let bibliography = Bibliography::parse(src).unwrap();
         let mut formatting_struct = CitekeyFormatting {
-            bibfile_path: PathBuf::new(),
+            bibfile_path: (PathBuf::new(), None),
             bib_entries: bibliography,
             fields: vec![
                 "entrytype;;;;:".into(),
@@ -294,29 +343,35 @@ mod tests {
             case: None,
             old_new_keys_map: Vec::new(),
         };
-        formatting_struct.do_formatting();
-        let keys = formatting_struct.bib_entries.keys().collect_vec();
+        let _ = formatting_struct.do_formatting();
+        assert_eq!(
+            formatting_struct.old_new_keys_map.get(0).unwrap().1,
+            "article:Bos-McCurley_LaT_met_and_pub_Empt_2023"
+        );
         assert_eq!(
-            keys[0],
+            formatting_struct.old_new_keys_map.get(1).unwrap().1,
             "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021"
         );
         formatting_struct.case = Some(CitekeyCase::Lower);
-        formatting_struct.do_formatting();
-        let keys = formatting_struct.bib_entries.keys().collect_vec();
+        let _ = formatting_struct.do_formatting().rev_sort_new_keys_by_len();
+        // now the longer citekey is processed first and its in lowercase!
         assert_eq!(
-            keys[0],
+            formatting_struct.old_new_keys_map.get(0).unwrap().1,
             "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021"
         );
-        // let bib_string = formatting_struct.bib_entries.to_biblatex_string();
-        // let new_entry = r"
-        // @book{book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021,
-        //     title         = {Colonialism and \textbf{Modern Social Theory}},
-        //     author        = {Bhambra, Gurminder K. and Holmwood, John},
-        //     location      = {Cambridge and Medford},
-        //     publisher     = {Polity Press},
-        //     date          = {2021},
-        // }
-        // ";
-        // assert_eq!(new_entry, bib_string);
+    }
+
+    #[test]
+    fn sorting_appended_citekeys() {
+        let mut keys: Vec<(String, String)> = vec![
+            ("smith2000".into(), "smith_book_2000".into()),
+            ("smith2000a".into(), "smith_book_2000a".into()),
+            ("smith2000ab".into(), "smith_book_2000ab".into()),
+        ];
+        keys.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
+        let mut keys = keys.iter();
+        assert_eq!(keys.next().unwrap().1, "smith_book_2000ab");
+        assert_eq!(keys.next().unwrap().1, "smith_book_2000a");
+        assert_eq!(keys.next().unwrap().1, "smith_book_2000");
     }
 }
diff --git a/src/cliargs.rs b/src/cliargs.rs
index 082ecda..3b12fc3 100644
--- a/src/cliargs.rs
+++ b/src/cliargs.rs
@@ -18,20 +18,19 @@
 use color_eyre::eyre::Result;
 use dirs::{config_dir, home_dir};
 use lexopt::prelude::*;
+use owo_colors::OwoColorize;
 use owo_colors::colors::css::LightGreen;
 use owo_colors::colors::*;
-use owo_colors::OwoColorize;
 use std::env;
 use std::path::PathBuf;
 use walkdir::WalkDir;
 
 use crate::app;
+use crate::config::BibiConfig;
 
 // struct for CLIArgs
 #[derive(Debug, Default, Clone)]
 pub struct CLIArgs {
-    pub helparg: bool,
-    pub versionarg: bool,
     pub pos_args: Vec<PathBuf>,
     pub cfg_path: Option<PathBuf>,
     pub light_theme: bool,
@@ -39,7 +38,7 @@ pub struct CLIArgs {
 }
 
 impl CLIArgs {
-    pub fn parse_args() -> Result<CLIArgs, lexopt::Error> {
+    pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig)> {
         let mut args = CLIArgs::default();
         let mut parser = lexopt::Parser::from_env();
 
@@ -52,22 +51,57 @@ impl CLIArgs {
             None
         };
 
+        // if parser
+        //     .raw_args()
+        //     .is_ok_and(|mut arg| arg.next_if(|a| a == "format-citekeys").is_some())
+        // {
+        //     todo!("Format citekeys options");
+        // }
+
         while let Some(arg) = parser.next()? {
             match arg {
-                Short('h') | Long("help") => args.helparg = true,
-                Short('v') | Long("version") => args.versionarg = true,
+                Short('h') | Long("help") => {
+                    println!("{}", help_func());
+                    std::process::exit(0);
+                }
+                Short('v') | Long("version") => {
+                    println!("{}", version_func());
+                    std::process::exit(0);
+                }
                 Short('c') | Long("config-file") => args.cfg_path = Some(parser.value()?.parse()?),
                 Long("light-terminal") => args.light_theme = true,
                 Long("pdf-path") => {
                     args.pdf_path = Some(parser.value()?.parse()?);
                 }
                 // Value(pos_arg) => parse_files(&mut args, pos_arg),
-                Value(pos_arg) => args.pos_args.push(pos_arg.into()),
-                _ => return Err(arg.unexpected()),
+                Value(pos_arg) => {
+                    if args.pos_args.is_empty() && pos_arg == "format-citekeys" {
+                        todo!("Write format citekeys function");
+                    } else {
+                        args.pos_args.push(parser.value()?.into());
+                    }
+                }
+                _ => return Err(arg.unexpected().into()),
             }
         }
 
-        Ok(args)
+        if args
+            .cfg_path
+            .as_ref()
+            .is_some_and(|f| !f.try_exists().unwrap() || !f.is_file())
+        {
+            BibiConfig::create_default_config(&args);
+        }
+
+        let mut cfg = if args.cfg_path.is_some() {
+            BibiConfig::parse_config(&args)?
+        } else {
+            BibiConfig::new(&args)
+        };
+
+        cfg.cli_overwrite(&args);
+
+        Ok((args, cfg))
     }
 }
 
diff --git a/src/main.rs b/src/main.rs
index c956d7c..58805d5 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -31,35 +31,7 @@ pub mod tui;
 #[tokio::main]
 async fn main() -> Result<()> {
     // Parse CLI arguments
-    let mut parsed_args = CLIArgs::parse_args()?;
-
-    // Print help if -h/--help flag is passed and exit
-    if parsed_args.helparg {
-        println!("{}", cliargs::help_func());
-        std::process::exit(0);
-    }
-
-    // Print version if -v/--version flag is passed and exit
-    if parsed_args.versionarg {
-        println!("{}", cliargs::version_func());
-        std::process::exit(0);
-    }
-
-    if parsed_args
-        .cfg_path
-        .as_ref()
-        .is_some_and(|f| !f.try_exists().unwrap() || !f.is_file())
-    {
-        BibiConfig::create_default_config(&parsed_args);
-    }
-
-    let mut cfg = if parsed_args.cfg_path.is_some() {
-        BibiConfig::parse_config(&parsed_args)?
-    } else {
-        BibiConfig::new(&parsed_args)
-    };
-
-    cfg.cli_overwrite(&parsed_args);
+    let (mut parsed_args, mut cfg) = CLIArgs::parse_args()?;
 
     init_error_hooks()?;
 
-- 
cgit v1.2.3


From 669936a8e4ff99012e8b32ae15616f8fe206ab2d Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Thu, 9 Oct 2025 16:48:54 +0200
Subject: subcommand test for pure cli operations

---
 src/cliargs.rs | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/cliargs.rs b/src/cliargs.rs
index 3b12fc3..26a07af 100644
--- a/src/cliargs.rs
+++ b/src/cliargs.rs
@@ -15,7 +15,6 @@
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 /////
 
-use color_eyre::eyre::Result;
 use dirs::{config_dir, home_dir};
 use lexopt::prelude::*;
 use owo_colors::OwoColorize;
@@ -41,6 +40,7 @@ impl CLIArgs {
     pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig)> {
         let mut args = CLIArgs::default();
         let mut parser = lexopt::Parser::from_env();
+        let mut subcommand = None;
 
         // Default config
         args.cfg_path = if config_dir().is_some() {
@@ -73,12 +73,22 @@ impl CLIArgs {
                 Long("pdf-path") => {
                     args.pdf_path = Some(parser.value()?.parse()?);
                 }
-                // Value(pos_arg) => parse_files(&mut args, pos_arg),
                 Value(pos_arg) => {
-                    if args.pos_args.is_empty() && pos_arg == "format-citekeys" {
-                        todo!("Write format citekeys function");
+                    if args.pos_args.is_empty() {
+                        let value = pos_arg
+                            .into_string()
+                            .unwrap_or_else(|os| os.to_string_lossy().to_string());
+                        match value.as_str() {
+                            "format-citekeys" => {
+                                subcommand = Some(value);
+                                break;
+                            }
+                            _ => {
+                                args.pos_args.push(value.into());
+                            }
+                        }
                     } else {
-                        args.pos_args.push(parser.value()?.into());
+                        args.pos_args.push(pos_arg.into());
                     }
                 }
                 _ => return Err(arg.unexpected().into()),
@@ -88,7 +98,7 @@ impl CLIArgs {
         if args
             .cfg_path
             .as_ref()
-            .is_some_and(|f| !f.try_exists().unwrap() || !f.is_file())
+            .is_some_and(|f| f.try_exists().is_err() || !f.is_file())
         {
             BibiConfig::create_default_config(&args);
         }
@@ -99,6 +109,13 @@ impl CLIArgs {
             BibiConfig::new(&args)
         };
 
+        if let Some(cmd) = subcommand {
+            match cmd.as_str() {
+                "format-citekeys" => todo!("write citekey formatting"),
+                _ => {}
+            }
+        }
+
         cfg.cli_overwrite(&args);
 
         Ok((args, cfg))
-- 
cgit v1.2.3


From f9548af5c7693edf536b4ad45564a964338e2c2e Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Fri, 10 Oct 2025 10:31:45 +0200
Subject: set up citekey formatting cli, reformat help output

---
 Cargo.lock              |   1 +
 Cargo.toml              |   1 +
 src/bibiman/citekeys.rs |  87 ++++++++++++++++++++++++++++++++++++++-
 src/cliargs.rs          | 106 ++++++++++++++++++++++++++++++++----------------
 src/main.rs             |  14 ++++---
 5 files changed, 168 insertions(+), 41 deletions(-)

(limited to 'src')

diff --git a/Cargo.lock b/Cargo.lock
index 49f65b4..a27636e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -107,6 +107,7 @@ dependencies = [
  "editor-command",
  "figment",
  "futures",
+ "indoc",
  "itertools",
  "lexopt",
  "logos",
diff --git a/Cargo.toml b/Cargo.toml
index 098848e..abf1eee 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -40,6 +40,7 @@ figment = { version = "0.10.19", features = [ "toml", "test" ]}
 owo-colors = "4.2.2"
 logos = "0.15.1"
 phf = { version = "0.13.1", features = ["macros"] }
+indoc = "2.0.6"
 
 [workspace.metadata.cross.target.aarch64-unknown-linux-gnu]
 # Install libssl-dev:arm64, see <https://github.com/cross-rs/cross/blob/main/docs/custom_images.md#adding-dependencies-to-existing-images>
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index b7995ac..cafd124 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -23,7 +23,12 @@ use std::{
 
 use biblatex::{Bibliography, ChunksExt, Entry, Type};
 use color_eyre::eyre::eyre;
-use owo_colors::OwoColorize;
+use indoc::formatdoc;
+use lexopt::Arg::{Long, Short};
+use owo_colors::{
+    OwoColorize,
+    colors::{BrightBlue, Green, White},
+};
 use serde::{Deserialize, Serialize};
 
 use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig};
@@ -48,6 +53,30 @@ pub(crate) struct CitekeyFormatting {
 }
 
 impl CitekeyFormatting {
+    pub(crate) fn parse_citekey_cli(
+        parser: &mut lexopt::Parser,
+        cfg: &BibiConfig,
+    ) -> color_eyre::Result<()> {
+        let mut formatter = CitekeyFormatting::default();
+
+        while let Some(arg) = parser.next()? {
+            match arg {
+                Short('h') | Long("help") => {
+                    formatting_help();
+                    return Ok(());
+                }
+                Short('s') | Short('f') | Long("source") | Long("file") => {
+                    formatter.bibfile_path.0 = parser.value()?.into()
+                }
+                Short('t') | Short('o') | Long("target") | Long("output") => {
+                    formatter.bibfile_path.1 = Some(parser.value()?.into())
+                }
+                _ => return Err(arg.unexpected().into()),
+            }
+        }
+
+        Ok(())
+    }
     /// Start Citekey formatting with building a new instance of `CitekeyFormatting`
     /// Formatting is processed file by file, because `bibman` can handle
     /// multi-file setups.
@@ -136,6 +165,62 @@ impl CitekeyFormatting {
     }
 }
 
+fn formatting_help() {
+    let help = vec![
+        formatdoc!(
+            "{} {}\n",
+            env!("CARGO_PKG_NAME").fg::<Green>().bold(),
+            env!("CARGO_PKG_VERSION")
+        ),
+        formatdoc!("{}", "USAGE".bold()),
+        formatdoc!(
+            "\t{} {} {} {}\n",
+            env!("CARGO_PKG_NAME").fg::<White>().bold(),
+            "format-citekeys".bold(),
+            "--source=<SOURCE>".bold(),
+            "--output=<TARGET>".bold()
+        ),
+        formatdoc!(
+            "
+                \tThis help describes the CLI usage for the citekey formatting
+                \tfunctionality of bibiman. The definition of patterns how the
+                \tcitekeys should be formatted must be set in the config file.
+                \tFor further informations how to use this patterns etc. see:
+                \t{}
+            ",
+            "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman"
+                .italic()
+                .fg::<BrightBlue>()
+        ),
+        formatdoc!("{}", "OPTIONS".bold()),
+        formatdoc!(
+            "
+                \t{}
+                \tShow this help and exit
+            ",
+            "-h, --help".fg::<White>().bold()
+        ),
+        formatdoc! {"
+                \t{}
+                \tThe bibfile for which the citekey formatting should be processed.
+                \tTakes a path as argument.
+            ", "-s, -f, --source=, --file=".fg::<White>().bold()},
+        formatdoc!(
+            "
+                \t{}
+                \tThe bibfile to which the updated content should be written.
+                \tTakes a path as argument. If the file doesn't exist, it will be
+                \tcreated.
+                \tIf the argument isn't used, the original file will be {}!
+            ",
+            "-t, -o, --target=, --output=".fg::<White>().bold(),
+            "overwritten".italic(),
+        ),
+    ];
+    let help = help.join("\n");
+    println!("{}", help);
+}
+
 /// Build the citekey from the patterns defined in the config file
 fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String {
     let mut new_citekey = String::new();
diff --git a/src/cliargs.rs b/src/cliargs.rs
index 26a07af..e766e77 100644
--- a/src/cliargs.rs
+++ b/src/cliargs.rs
@@ -16,6 +16,7 @@
 /////
 
 use dirs::{config_dir, home_dir};
+use indoc::formatdoc;
 use lexopt::prelude::*;
 use owo_colors::OwoColorize;
 use owo_colors::colors::css::LightGreen;
@@ -25,6 +26,7 @@ use std::path::PathBuf;
 use walkdir::WalkDir;
 
 use crate::app;
+use crate::bibiman::citekeys::CitekeyFormatting;
 use crate::config::BibiConfig;
 
 // struct for CLIArgs
@@ -37,10 +39,16 @@ pub struct CLIArgs {
 }
 
 impl CLIArgs {
-    pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig)> {
+    /// This struct parses the command line and initializes and returns the
+    /// necessary structs `CLIArgs` and `BibiConfig`.
+    ///
+    /// Additionally, it returns a bool which defines if the TUI should be run
+    /// or not. The latter is the case for pure CLI processes as `format-citekeys`.
+    pub fn parse_args() -> color_eyre::Result<(CLIArgs, BibiConfig, bool)> {
         let mut args = CLIArgs::default();
         let mut parser = lexopt::Parser::from_env();
         let mut subcommand = None;
+        let mut run_tui = true;
 
         // Default config
         args.cfg_path = if config_dir().is_some() {
@@ -81,6 +89,7 @@ impl CLIArgs {
                         match value.as_str() {
                             "format-citekeys" => {
                                 subcommand = Some(value);
+                                run_tui = false;
                                 break;
                             }
                             _ => {
@@ -111,14 +120,16 @@ impl CLIArgs {
 
         if let Some(cmd) = subcommand {
             match cmd.as_str() {
-                "format-citekeys" => todo!("write citekey formatting"),
+                "format-citekeys" => {
+                    CitekeyFormatting::parse_citekey_cli(&mut parser, &cfg)?;
+                }
                 _ => {}
             }
         }
 
         cfg.cli_overwrite(&args);
 
-        Ok((args, cfg))
+        Ok((args, cfg, run_tui))
     }
 }
 
@@ -172,14 +183,21 @@ pub fn help_func() -> String {
             env!("CARGO_PKG_VERSION").fg::<LightGreen>(),
         ),
         format!(
-            "{}:\n\t{} [Flags] [files/dirs]\n",
+            "{}\n\t{} [OPTIONS] [SUBCOMMAND | POSITIONAL ARGUMENTS]\n",
             "USAGE".bold(),
-            "bibiman".bold()
+            env!("CARGO_PKG_NAME").fg::<White>().bold()
+        ),
+        formatdoc!(
+            "
+                \tYou can either use a {} or {}, not both!
+            ",
+            "subcommand".bold(),
+            "positional arguments".bold()
         ),
         format!(
-            "{}:\n\t{}\t\tPath to {} file",
+            "{}\n\t{}\t\tPath to {} file",
             "POSITIONAL ARGUMENTS".bold(),
-            "<file>".fg::<BrightMagenta>().bold(),
+            "<file>".fg::<Magenta>().bold(),
             ".bib".fg::<BrightBlack>().bold()
         ),
         format!(
@@ -188,38 +206,58 @@ pub fn help_func() -> String {
             ".bib".fg::<BrightBlack>().bold()
         ),
         format!("\n\t{}", "Both can be passed multiple times".italic()),
-        format!("\n{}:", "FLAGS".bold()),
-        format!("\t{}", "-h, --help".bold().fg::<BrightCyan>()),
-        format!("\t\t{}", "Show this help and exit"),
-        format!("\t{}", "-v, --version".bold().fg::<BrightCyan>()),
-        format!("\t\t{}", "Show the version and exit"),
-        format!("\t{}", "--light-terminal".bold().fg::<BrightCyan>()),
-        format!(
-            "\t\t{}",
-            "Enable default colors for light terminal background"
+        format!("\n{}", "SUBCOMMANDS".bold()),
+        formatdoc!(
+            "
+                \t{}
+                \tRun the citekey formatting procedure on a specified bibfile.
+                \tFor further infos run {}
+            ",
+            "format-citekeys".fg::<BrightYellow>().bold(),
+            "bibiman format-citekeys --help".fg::<BrightBlack>().bold()
         ),
-        format!(
-            "\t{}{}",
-            "-c, --config-file=".bold().fg::<BrightCyan>(),
-            "<value>".bold().italic().fg::<BrightCyan>()
+        format!("{}", "OPTIONS".bold()),
+        formatdoc!(
+            "
+            \t{}
+            \tShow this help and exit
+            ",
+            "-h, --help".bold().fg::<White>()
         ),
-        format!("\t\t{}", "Path to config file used for current session."),
-        format!("\t\t{}", "Takes precedence over standard config file."),
-        format!(
-            "\t{}{}",
-            "--pdf-path=".bold().fg::<BrightCyan>(),
-            "<value>".bold().italic().fg::<BrightCyan>()
+        formatdoc!(
+            "
+            \t{}
+            \tShow the version and exit
+            ",
+            "-v, --version".bold().fg::<White>()
         ),
-        format!("\t\t{}", "Path to directory containing PDF files."),
-        format!(
-            "\t\t{}",
-            "If the pdf files basename matches an entrys citekey,"
+        formatdoc!(
+            "
+            \t{}
+            \tEnable default colors for light terminal background
+            ",
+            "--light-terminal".bold().fg::<White>()
         ),
-        format!(
-            "\t\t{}",
-            "its attached as connected PDF file for the current session."
+        formatdoc!(
+            "
+            \t{}{}
+            \tPath to config file used for current session.
+            \tTakes precedence over standard config file.
+            ",
+            "-c, --config-file=".bold().fg::<White>(),
+            "<value>".bold().italic().fg::<White>()
+        ),
+        formatdoc!(
+            "
+            \t{}{}
+            \tPath to directory containing PDF files.
+            \tIf the pdf files basename matches an entrys citekey,
+            \tits attached as connected PDF file for the current session.
+            \tDoes not edit the bibfile itself!
+            ",
+            "--pdf-path=".bold().fg::<White>(),
+            "<value>".bold().italic().fg::<White>()
         ),
-        format!("\t\t{}", "Does not edit the bibfile itself!"),
     ];
     let help = help.join("\n");
     help
diff --git a/src/main.rs b/src/main.rs
index 58805d5..e735eb0 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -18,7 +18,6 @@
 use app::App;
 use cliargs::CLIArgs;
 use color_eyre::eyre::Result;
-use config::BibiConfig;
 use errorsetup::init_error_hooks;
 
 pub mod app;
@@ -31,13 +30,16 @@ pub mod tui;
 #[tokio::main]
 async fn main() -> Result<()> {
     // Parse CLI arguments
-    let (mut parsed_args, mut cfg) = CLIArgs::parse_args()?;
+    let (mut parsed_args, mut cfg, run_tui) = CLIArgs::parse_args()?;
 
-    init_error_hooks()?;
+    if run_tui {
+        init_error_hooks()?;
 
-    // Create an application.
-    let mut app = App::new(&mut parsed_args, &mut cfg)?;
+        // Create an application.
+        let mut app = App::new(&mut parsed_args, &mut cfg)?;
+
+        app.run(&cfg).await?;
+    }
 
-    app.run(&cfg).await?;
     Ok(())
 }
-- 
cgit v1.2.3


From 4779dbc5fe3712bce31bbb5f1f43c28c4c839420 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Fri, 10 Oct 2025 13:47:07 +0200
Subject: substitute byte index for char counting loop, impl `dry-run` option
 for citekey formatting

---
 src/bibiman/citekeys.rs | 118 +++++++++++++++++++++++++++++++++++-------------
 tests/test-config.toml  |   4 ++
 2 files changed, 90 insertions(+), 32 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index cafd124..5121741 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -22,7 +22,7 @@ use std::{
 };
 
 use biblatex::{Bibliography, ChunksExt, Entry, Type};
-use color_eyre::eyre::eyre;
+use color_eyre::eyre::{OptionExt, eyre};
 use indoc::formatdoc;
 use lexopt::Arg::{Long, Short};
 use owo_colors::{
@@ -50,6 +50,7 @@ pub(crate) struct CitekeyFormatting {
     fields: Vec<String>,
     case: Option<CitekeyCase>,
     old_new_keys_map: Vec<(String, String)>,
+    dry_run: bool,
 }
 
 impl CitekeyFormatting {
@@ -59,12 +60,27 @@ impl CitekeyFormatting {
     ) -> color_eyre::Result<()> {
         let mut formatter = CitekeyFormatting::default();
 
+        formatter.fields = cfg
+            .citekey_formatter
+            .fields
+            .clone()
+            .ok_or_eyre("Need to define fields correctly in config file")?;
+
+        formatter.case = cfg.citekey_formatter.case.clone();
+
+        if formatter.fields.is_empty() {
+            return Err(eyre!(
+                "To format all citekeys, you need to provide {} values in the config file",
+                "fields".bold()
+            ));
+        }
         while let Some(arg) = parser.next()? {
             match arg {
                 Short('h') | Long("help") => {
                     formatting_help();
                     return Ok(());
                 }
+                Short('d') | Long("dry-run") => formatter.dry_run = true,
                 Short('s') | Short('f') | Long("source") | Long("file") => {
                     formatter.bibfile_path.0 = parser.value()?.into()
                 }
@@ -75,6 +91,16 @@ impl CitekeyFormatting {
             }
         }
 
+        let bibstring = std::fs::read_to_string(&formatter.bibfile_path.0)?;
+
+        formatter.bib_entries = Bibliography::parse(&bibstring)
+            .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?;
+
+        formatter
+            .do_formatting()
+            .rev_sort_new_keys_by_len()
+            .update_file()?;
+
         Ok(())
     }
     /// Start Citekey formatting with building a new instance of `CitekeyFormatting`
@@ -108,6 +134,7 @@ impl CitekeyFormatting {
             fields,
             case: cfg.citekey_formatter.case.clone(),
             old_new_keys_map: Vec::new(),
+            dry_run: false,
         })
     }
 
@@ -122,32 +149,40 @@ impl CitekeyFormatting {
         }
 
         self.old_new_keys_map = old_new_keys;
+        // dbg!(&self.old_new_keys_map);
 
         self
     }
 
     /// Write entries with updated citekeys to bibfile
-    pub fn update_file(&self) -> color_eyre::Result<()> {
-        let source_file = self.bibfile_path.0.as_path();
-        let target_file = if let Some(path) = &self.bibfile_path.1 {
-            path
+    pub fn update_file(&mut self) -> color_eyre::Result<()> {
+        if self.dry_run {
+            println!("Following citekeys would be formatted: old => new\n");
+            self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0));
+            for (old, new) in &self.old_new_keys_map {
+                println!("{} => {}", old.italic(), new.bold())
+            }
         } else {
-            source_file
-        };
-        let mut content = std::fs::read_to_string(source_file)?;
-
-        for (old_key, new_key) in self.old_new_keys_map.iter() {
-            content = content.replace(old_key, new_key);
-        }
+            let source_file = self.bibfile_path.0.as_path();
+            let target_file = if let Some(path) = &self.bibfile_path.1 {
+                path
+            } else {
+                source_file
+            };
+            let mut content = std::fs::read_to_string(source_file)?;
 
-        let mut new_file = OpenOptions::new()
-            .truncate(true)
-            .write(true)
-            .create(true)
-            .open(target_file)?;
+            for (old_key, new_key) in self.old_new_keys_map.iter() {
+                content = content.replace(old_key, new_key);
+            }
 
-        new_file.write_all(content.as_bytes())?;
+            let mut new_file = OpenOptions::new()
+                .truncate(true)
+                .write(true)
+                .create(true)
+                .open(target_file)?;
 
+            new_file.write_all(content.as_bytes())?;
+        }
         Ok(())
     }
 
@@ -160,7 +195,7 @@ impl CitekeyFormatting {
     /// anymore.
     pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self {
         self.old_new_keys_map
-            .sort_by(|a, b| b.1.len().cmp(&a.1.len()));
+            .sort_by(|a, b| b.0.len().cmp(&a.0.len()));
         self
     }
 }
@@ -232,14 +267,20 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey
             let field = preformat_field(field, entry);
             let mut split_field = field.split_whitespace();
             let mut words_passed = 0;
-            let word_count = if let Some(val) = word_count {
+            let field_count = field.split_whitespace().count();
+            let word_count = if let Some(val) = word_count
+                && val <= field_count
+            {
                 val
             } else {
-                field.split_whitespace().count()
+                field_count
             };
             loop {
+                if field.is_empty() {
+                    break;
+                }
                 if let Some(field_slice) = split_field.next() {
-                    formatted_str = formatted_str + format_word(field_slice, char_count);
+                    formatted_str = formatted_str + &format_word(field_slice, char_count);
                     words_passed += 1;
                     if word_count == words_passed {
                         formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
@@ -270,7 +311,7 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey
 fn preformat_field(field: &str, entry: &Entry) -> String {
     match field {
         "title" => {
-            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into()))
+            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
         }
         "author" => {
             if let Ok(authors) = entry.author() {
@@ -278,21 +319,20 @@ fn preformat_field(field: &str, entry: &Entry) -> String {
                 for a in authors.iter() {
                     last_names = last_names + &a.name + " ";
                 }
-                dbg!(&last_names);
                 last_names
             } else {
-                "NA".to_string()
+                "".to_string()
             }
         }
         "year" => {
             if let Ok(date) = entry.date() {
                 date.to_chunks().format_verbatim()[..4].to_string()
             } else {
-                entry.get_as::<String>(field).unwrap_or("NA".into())
+                entry.get_as::<String>(field).unwrap_or("".into())
             }
         }
         "subtitle" => {
-            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into()))
+            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
         }
         "editor" => {
             if let Ok(editors) = entry.editors() {
@@ -304,22 +344,35 @@ fn preformat_field(field: &str, entry: &Entry) -> String {
                 }
                 last_names
             } else {
-                "NA".to_string()
+                "".to_string()
             }
         }
         "pubtype" | "entrytype" => entry.entry_type.to_string(),
-        _ => entry.get_as::<String>(field).unwrap_or("Empty".into()),
+        _ => entry.get_as::<String>(field).unwrap_or("".into()),
     }
 }
 
 /// Cut of word at char count index if its set
-fn format_word(word: &str, count: Option<usize>) -> &str {
+fn format_word(word: &str, count: Option<usize>) -> String {
     if let Some(len) = count
         && len < word.chars().count()
     {
-        &word[..len]
+        // Since chars can consist of multiple bytes, we need this more complex
+        // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...)
+        // instead of simple byte indexing
+        let mut word_slice = String::new();
+        let word_chars = word.chars();
+        let mut counter = 0;
+        for c in word_chars {
+            if counter == len {
+                break;
+            }
+            word_slice.push(c);
+            counter += 1;
+        }
+        word_slice
     } else {
-        word
+        word.to_string()
     }
 }
 
@@ -427,6 +480,7 @@ mod tests {
             ],
             case: None,
             old_new_keys_map: Vec::new(),
+            dry_run: false,
         };
         let _ = formatting_struct.do_formatting();
         assert_eq!(
diff --git a/tests/test-config.toml b/tests/test-config.toml
index fc447f1..b484b69 100644
--- a/tests/test-config.toml
+++ b/tests/test-config.toml
@@ -59,3 +59,7 @@ custom_column = "series"
 # author_color = "38"
 # title_color = "37"
 # year_color = "135"
+
+[citekey_formatter]
+fields = [ "author;2;;-;_", "title;3;3;_;_", "year" ]
+case = "lowercase"
-- 
cgit v1.2.3


From c69b1789fabaf149916d160922d7026f2cbe33f1 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Fri, 10 Oct 2025 14:57:53 +0200
Subject: implement const of ignored special chars for citekey formatting

* the list contains 33 special chars at the moment
* it will only affect already existing special chars in biblatex fields
* delimiter specified for citekey formatting are not affected
* char count is also not affected, ignored chars are not counted
---
 src/bibiman/citekeys.rs | 40 +++++++++++++++++++++-------------------
 src/config.rs           |  5 +++++
 tests/test-config.toml  |  2 +-
 3 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 5121741..7c06886 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -31,7 +31,10 @@ use owo_colors::{
 };
 use serde::{Deserialize, Serialize};
 
-use crate::{bibiman::sanitize::sanitize_single_string_fully, config::BibiConfig};
+use crate::{
+    bibiman::sanitize::sanitize_single_string_fully,
+    config::{BibiConfig, IGNORED_SPECIAL_CHARS},
+};
 
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub enum CitekeyCase {
@@ -354,26 +357,25 @@ fn preformat_field(field: &str, entry: &Entry) -> String {
 
 /// Cut of word at char count index if its set
 fn format_word(word: &str, count: Option<usize>) -> String {
-    if let Some(len) = count
-        && len < word.chars().count()
-    {
-        // Since chars can consist of multiple bytes, we need this more complex
-        // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...)
-        // instead of simple byte indexing
-        let mut word_slice = String::new();
-        let word_chars = word.chars();
-        let mut counter = 0;
-        for c in word_chars {
-            if counter == len {
-                break;
-            }
-            word_slice.push(c);
-            counter += 1;
+    // Since chars can consist of multiple bytes, we need this more complex
+    // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...)
+    // instead of simple byte indexing
+    let mut word_slice = String::new();
+    let word_chars = word.chars();
+    let mut counter = 0;
+    for c in word_chars {
+        if let Some(len) = count
+            && counter == len
+        {
+            break;
         }
-        word_slice
-    } else {
-        word.to_string()
+        if IGNORED_SPECIAL_CHARS.contains(&c) {
+            continue;
+        }
+        word_slice.push(c);
+        counter += 1;
     }
+    word_slice
 }
 
 /// Split a formatting pattern of kind
diff --git a/src/config.rs b/src/config.rs
index a5df61c..a4e89be 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -35,6 +35,11 @@ use crate::{
     cliargs::CLIArgs,
 };
 
+pub const IGNORED_SPECIAL_CHARS: [char; 33] = [
+    '?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%',
+    '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"',
+];
+
 const DEFAULT_CONFIG: &str = r##"
 # [general]
 ## Default files/dirs which are loaded on startup
diff --git a/tests/test-config.toml b/tests/test-config.toml
index b484b69..558d216 100644
--- a/tests/test-config.toml
+++ b/tests/test-config.toml
@@ -61,5 +61,5 @@ custom_column = "series"
 # year_color = "135"
 
 [citekey_formatter]
-fields = [ "author;2;;-;_", "title;3;3;_;_", "year" ]
+fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ]
 case = "lowercase"
-- 
cgit v1.2.3


From 418d2f3874c8e86c4b58143115ee3d4181130f9c Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Fri, 10 Oct 2025 15:09:48 +0200
Subject: add dry-run information to --help function

---
 src/bibiman/citekeys.rs | 11 +++++++++++
 src/config.rs           | 15 +++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 7c06886..f7704fb 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -238,6 +238,17 @@ fn formatting_help() {
             ",
             "-h, --help".fg::<White>().bold()
         ),
+        formatdoc!(
+            "
+                \t{}
+                \tDon't apply any changes to the named files. Instead print all
+                \told citekeys and the formatted strings that would have been
+                \tapplied in the format: {} => {}
+            ",
+            "-d, --dry-run".fg::<White>().bold(),
+            "old_key".italic(),
+            "new_key".bold()
+        ),
         formatdoc! {"
                 \t{}
                 \tThe bibfile for which the citekey formatting should be processed.
diff --git a/src/config.rs b/src/config.rs
index a4e89be..b1c4b07 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -103,6 +103,21 @@ const DEFAULT_CONFIG: &str = r##"
 # author_color = "38"
 # title_color = "37"
 # year_color = "135"
+
+# [citekey_formatter]
+## Define the patterns for creating citekeys. Every item of the array consists of
+## five components separated by semicolons. Despite the field name every component
+## can be left blank:
+## - name of the biblatex field ("author", "title"...)
+## - number of max words from the given field
+## - number of chars used from each word
+## - delimiter to separate words of the same field
+## - trailing delimiter separating the current field from the following
+# fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ]
+
+## Convert chars to specified case. Possible values:
+## "upper", "uppercase", "lower", "lowercase"
+# case = "lowercase"
 "##;
 
 /// Main struct of the config file. Contains substructs/headings in toml
-- 
cgit v1.2.3


From 211d556d6dc7132442c90b3ae6ac7485cf30ac5e Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Fri, 10 Oct 2025 18:22:26 +0200
Subject: trimming citekey if last field is empty: WIP

---
 src/bibiman/citekeys.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index f7704fb..1f16b48 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -273,7 +273,8 @@ fn formatting_help() {
 /// Build the citekey from the patterns defined in the config file
 fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String {
     let mut new_citekey = String::new();
-    for pattern in pattern_fields.iter() {
+    let fields = pattern_fields.len();
+    for (idx, pattern) in pattern_fields.iter().enumerate() {
         let (field, word_count, char_count, inner_delimiter, trailing_delimiter) =
             split_formatting_pat(pattern);
         let formatted_field_str = {
-- 
cgit v1.2.3


From 49d9a57bd15565116a51380d3552201b4a2de57b Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Sun, 12 Oct 2025 14:05:47 +0200
Subject: pop trailing delimiter if last field is empty

---
 src/bibiman/citekeys.rs | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 1f16b48..065d57f 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -272,14 +272,27 @@ fn formatting_help() {
 
 /// Build the citekey from the patterns defined in the config file
 fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String {
+    // mut string the citekey is built from
     let mut new_citekey = String::new();
+
+    // count different fields of pattern vec
     let fields = pattern_fields.len();
+
+    // loop over pattern fields process them
     for (idx, pattern) in pattern_fields.iter().enumerate() {
-        let (field, word_count, char_count, inner_delimiter, trailing_delimiter) =
+        // parse single values from pattern field
+        let (field_name, word_count, char_count, inner_delimiter, trailing_delimiter) =
             split_formatting_pat(pattern);
+
+        // built the part of the citekey from the current pattern field
         let formatted_field_str = {
             let mut formatted_str = String::new();
-            let field = preformat_field(field, entry);
+
+            // preformat the field depending on biblatex value
+            let field = preformat_field(field_name, entry);
+
+            // split at whitespaces, count fields and set counter for processed
+            // splits
             let mut split_field = field.split_whitespace();
             let mut words_passed = 0;
             let field_count = field.split_whitespace().count();
@@ -290,10 +303,19 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey
             } else {
                 field_count
             };
+
+            // loop over single parts of current field and add correct delimiter
             loop {
+                // terminate loop for current field if its empty. If its also the
+                // last of the pattern vec, pop the trailing delimiter
                 if field.is_empty() {
+                    if idx + 1 == fields {
+                        let _ = new_citekey.pop();
+                    }
                     break;
                 }
+
+                // process the single slices and add correct delimiter
                 if let Some(field_slice) = split_field.next() {
                     formatted_str = formatted_str + &format_word(field_slice, char_count);
                     words_passed += 1;
-- 
cgit v1.2.3


From f112c4e13009e5ddfe3cf5c4cbe7f29f832b8553 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Sun, 12 Oct 2025 21:51:21 +0200
Subject: solve double delimiters with empty fields

---
 src/bibiman/citekeys.rs | 36 ++++++++++++++++++++++++++++++------
 tests/test-config.toml  |  2 +-
 2 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 065d57f..9d17403 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -317,13 +317,37 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey
 
                 // process the single slices and add correct delimiter
                 if let Some(field_slice) = split_field.next() {
-                    formatted_str = formatted_str + &format_word(field_slice, char_count);
-                    words_passed += 1;
-                    if word_count == words_passed {
-                        formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
-                        break;
+                    // Create word slice char by char. We need to loop over chars
+                    // instead of a simple bytes index to also catch chars which
+                    // consist of more than one byte (äöüøæ etc...)
+                    let mut word_slice = String::new();
+                    let word_chars = field_slice.chars();
+                    let mut counter = 0;
+                    for c in word_chars {
+                        if let Some(len) = char_count
+                            && counter == len
+                        {
+                            break;
+                        }
+                        // if a word slice contains a special char, skip it
+                        if IGNORED_SPECIAL_CHARS.contains(&c) {
+                            continue;
+                        }
+                        word_slice.push(c);
+                        counter += 1;
+                    }
+                    // Don't count empty slices and don't add delimiter to those
+                    if !word_slice.is_empty() {
+                        formatted_str = formatted_str + &word_slice;
+                        words_passed += 1;
+                        if word_count == words_passed {
+                            formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
+                            break;
+                        } else {
+                            formatted_str = formatted_str + inner_delimiter.unwrap_or("");
+                        }
                     } else {
-                        formatted_str = formatted_str + inner_delimiter.unwrap_or("")
+                        continue;
                     }
                 } else {
                     formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
diff --git a/tests/test-config.toml b/tests/test-config.toml
index 558d216..2c5ac96 100644
--- a/tests/test-config.toml
+++ b/tests/test-config.toml
@@ -61,5 +61,5 @@ custom_column = "series"
 # year_color = "135"
 
 [citekey_formatter]
-fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ]
+fields = [ "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ]
 case = "lowercase"
-- 
cgit v1.2.3


From 0a8805acfb6fbb3d3a8c22f4ccbaf692a73cddfb Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Sun, 12 Oct 2025 23:01:17 +0200
Subject: ignore list for words, but need to solve inner delimiter problem for
 words ignored

---
 src/bibiman/citekeys.rs               | 317 ++------------------------------
 src/bibiman/citekeys/citekey_utils.rs | 327 ++++++++++++++++++++++++++++++++++
 tests/test-config.toml                |   7 +-
 3 files changed, 348 insertions(+), 303 deletions(-)
 create mode 100644 src/bibiman/citekeys/citekey_utils.rs

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 9d17403..2f56947 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -21,27 +21,33 @@ use std::{
     path::{Path, PathBuf},
 };
 
-use biblatex::{Bibliography, ChunksExt, Entry, Type};
+use biblatex::Bibliography;
 use color_eyre::eyre::{OptionExt, eyre};
-use indoc::formatdoc;
 use lexopt::Arg::{Long, Short};
-use owo_colors::{
-    OwoColorize,
-    colors::{BrightBlue, Green, White},
-};
+use owo_colors::OwoColorize;
 use serde::{Deserialize, Serialize};
 
 use crate::{
-    bibiman::sanitize::sanitize_single_string_fully,
-    config::{BibiConfig, IGNORED_SPECIAL_CHARS},
+    bibiman::citekeys::citekey_utils::{build_citekey, formatting_help},
+    config::BibiConfig,
 };
 
+mod citekey_utils;
+
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub enum CitekeyCase {
     #[serde(alias = "uppercase", alias = "upper")]
     Upper,
     #[serde(alias = "lowercase", alias = "lower")]
     Lower,
+    #[serde(
+        alias = "camel",
+        alias = "camelcase",
+        alias = "camel_case",
+        alias = "uppercamelcase",
+        alias = "upper_camel_case"
+    )]
+    Camel,
 }
 
 #[derive(Debug, Default, Clone)]
@@ -203,306 +209,13 @@ impl CitekeyFormatting {
     }
 }
 
-fn formatting_help() {
-    let help = vec![
-        formatdoc!(
-            "{} {}\n",
-            env!("CARGO_PKG_NAME").fg::<Green>().bold(),
-            env!("CARGO_PKG_VERSION")
-        ),
-        formatdoc!("{}", "USAGE".bold()),
-        formatdoc!(
-            "\t{} {} {} {}\n",
-            env!("CARGO_PKG_NAME").fg::<White>().bold(),
-            "format-citekeys".bold(),
-            "--source=<SOURCE>".bold(),
-            "--output=<TARGET>".bold()
-        ),
-        formatdoc!(
-            "
-                \tThis help describes the CLI usage for the citekey formatting
-                \tfunctionality of bibiman. The definition of patterns how the
-                \tcitekeys should be formatted must be set in the config file.
-                \tFor further informations how to use this patterns etc. see:
-                \t{}
-            ",
-            "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman"
-                .italic()
-                .fg::<BrightBlue>()
-        ),
-        formatdoc!("{}", "OPTIONS".bold()),
-        formatdoc!(
-            "
-                \t{}
-                \tShow this help and exit
-            ",
-            "-h, --help".fg::<White>().bold()
-        ),
-        formatdoc!(
-            "
-                \t{}
-                \tDon't apply any changes to the named files. Instead print all
-                \told citekeys and the formatted strings that would have been
-                \tapplied in the format: {} => {}
-            ",
-            "-d, --dry-run".fg::<White>().bold(),
-            "old_key".italic(),
-            "new_key".bold()
-        ),
-        formatdoc! {"
-                \t{}
-                \tThe bibfile for which the citekey formatting should be processed.
-                \tTakes a path as argument.
-            ", "-s, -f, --source=, --file=".fg::<White>().bold()},
-        formatdoc!(
-            "
-                \t{}
-                \tThe bibfile to which the updated content should be written.
-                \tTakes a path as argument. If the file doesn't exist, it will be
-                \tcreated.
-                \tIf the argument isn't used, the original file will be {}!
-            ",
-            "-t, -o, --target=, --output=".fg::<White>().bold(),
-            "overwritten".italic(),
-        ),
-    ];
-    let help = help.join("\n");
-    println!("{}", help);
-}
-
-/// Build the citekey from the patterns defined in the config file
-fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String {
-    // mut string the citekey is built from
-    let mut new_citekey = String::new();
-
-    // count different fields of pattern vec
-    let fields = pattern_fields.len();
-
-    // loop over pattern fields process them
-    for (idx, pattern) in pattern_fields.iter().enumerate() {
-        // parse single values from pattern field
-        let (field_name, word_count, char_count, inner_delimiter, trailing_delimiter) =
-            split_formatting_pat(pattern);
-
-        // built the part of the citekey from the current pattern field
-        let formatted_field_str = {
-            let mut formatted_str = String::new();
-
-            // preformat the field depending on biblatex value
-            let field = preformat_field(field_name, entry);
-
-            // split at whitespaces, count fields and set counter for processed
-            // splits
-            let mut split_field = field.split_whitespace();
-            let mut words_passed = 0;
-            let field_count = field.split_whitespace().count();
-            let word_count = if let Some(val) = word_count
-                && val <= field_count
-            {
-                val
-            } else {
-                field_count
-            };
-
-            // loop over single parts of current field and add correct delimiter
-            loop {
-                // terminate loop for current field if its empty. If its also the
-                // last of the pattern vec, pop the trailing delimiter
-                if field.is_empty() {
-                    if idx + 1 == fields {
-                        let _ = new_citekey.pop();
-                    }
-                    break;
-                }
-
-                // process the single slices and add correct delimiter
-                if let Some(field_slice) = split_field.next() {
-                    // Create word slice char by char. We need to loop over chars
-                    // instead of a simple bytes index to also catch chars which
-                    // consist of more than one byte (äöüøæ etc...)
-                    let mut word_slice = String::new();
-                    let word_chars = field_slice.chars();
-                    let mut counter = 0;
-                    for c in word_chars {
-                        if let Some(len) = char_count
-                            && counter == len
-                        {
-                            break;
-                        }
-                        // if a word slice contains a special char, skip it
-                        if IGNORED_SPECIAL_CHARS.contains(&c) {
-                            continue;
-                        }
-                        word_slice.push(c);
-                        counter += 1;
-                    }
-                    // Don't count empty slices and don't add delimiter to those
-                    if !word_slice.is_empty() {
-                        formatted_str = formatted_str + &word_slice;
-                        words_passed += 1;
-                        if word_count == words_passed {
-                            formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
-                            break;
-                        } else {
-                            formatted_str = formatted_str + inner_delimiter.unwrap_or("");
-                        }
-                    } else {
-                        continue;
-                    }
-                } else {
-                    formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
-                    break;
-                };
-            }
-            formatted_str
-        };
-        new_citekey = new_citekey + &formatted_field_str;
-    }
-    if let Some(case_format) = case {
-        match case_format {
-            CitekeyCase::Lower => new_citekey.to_lowercase(),
-            CitekeyCase::Upper => new_citekey.to_uppercase(),
-        }
-    } else {
-        new_citekey
-    }
-}
-
-/// Preformat some fields which are very common to be used in citekeys
-fn preformat_field(field: &str, entry: &Entry) -> String {
-    match field {
-        "title" => {
-            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
-        }
-        "author" => {
-            if let Ok(authors) = entry.author() {
-                let mut last_names = String::new();
-                for a in authors.iter() {
-                    last_names = last_names + &a.name + " ";
-                }
-                last_names
-            } else {
-                "".to_string()
-            }
-        }
-        "year" => {
-            if let Ok(date) = entry.date() {
-                date.to_chunks().format_verbatim()[..4].to_string()
-            } else {
-                entry.get_as::<String>(field).unwrap_or("".into())
-            }
-        }
-        "subtitle" => {
-            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
-        }
-        "editor" => {
-            if let Ok(editors) = entry.editors() {
-                let mut last_names = String::new();
-                for editortypes in editors.iter() {
-                    for e in editortypes.0.iter() {
-                        last_names = last_names + &e.name + " ";
-                    }
-                }
-                last_names
-            } else {
-                "".to_string()
-            }
-        }
-        "pubtype" | "entrytype" => entry.entry_type.to_string(),
-        _ => entry.get_as::<String>(field).unwrap_or("".into()),
-    }
-}
-
-/// Cut of word at char count index if its set
-fn format_word(word: &str, count: Option<usize>) -> String {
-    // Since chars can consist of multiple bytes, we need this more complex
-    // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...)
-    // instead of simple byte indexing
-    let mut word_slice = String::new();
-    let word_chars = word.chars();
-    let mut counter = 0;
-    for c in word_chars {
-        if let Some(len) = count
-            && counter == len
-        {
-            break;
-        }
-        if IGNORED_SPECIAL_CHARS.contains(&c) {
-            continue;
-        }
-        word_slice.push(c);
-        counter += 1;
-    }
-    word_slice
-}
-
-/// Split a formatting pattern of kind
-/// `<field>;<word count>;<char count>;<inside delimiter>;<trailing delimiter>`,
-/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")`
-fn split_formatting_pat(
-    pattern: &str,
-) -> (
-    &str,
-    Option<usize>,
-    Option<usize>,
-    Option<&str>,
-    Option<&str>,
-) {
-    let mut splits = pattern.split(';');
-    (
-        splits
-            .next()
-            .expect("Need field value for formatting citekey"),
-        if let Some(next) = splits.next()
-            && next.len() > 0
-        {
-            next.parse::<usize>().ok()
-        } else {
-            None
-        },
-        if let Some(next) = splits.next()
-            && next.len() > 0
-        {
-            next.parse::<usize>().ok()
-        } else {
-            None
-        },
-        splits.next(),
-        splits.next(),
-    )
-}
-
 #[cfg(test)]
 mod tests {
     use std::path::PathBuf;
 
     use biblatex::Bibliography;
-    use itertools::Itertools;
-
-    use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat};
-
-    #[test]
-    fn split_citekey_pattern() {
-        let pattern = "title;3;5;_;_";
-
-        assert_eq!(
-            split_formatting_pat(pattern),
-            ("title", Some(3), Some(5), Some("_"), Some("_"))
-        );
 
-        let pattern = "year";
-
-        assert_eq!(
-            split_formatting_pat(pattern),
-            ("year", None, None, None, None)
-        );
-
-        let pattern = "author;1;;;_";
-        assert_eq!(
-            split_formatting_pat(pattern),
-            ("author", Some(1), None, Some(""), Some("_"))
-        );
-    }
+    use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting};
 
     #[test]
     fn format_citekey_test() {
diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs
new file mode 100644
index 0000000..ee2c849
--- /dev/null
+++ b/src/bibiman/citekeys/citekey_utils.rs
@@ -0,0 +1,327 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025  lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use biblatex::{ChunksExt, Entry, Type};
+use indoc::formatdoc;
+use owo_colors::{
+    OwoColorize,
+    colors::{BrightBlue, Green, White},
+};
+
+use crate::{
+    bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully},
+    config::IGNORED_SPECIAL_CHARS,
+};
+
+const IGNORE_WORDS: [&str; 20] = [
+    "the", "a", "an", "of", "for", "in", "at", "to", "and", "der", "die", "das", "ein", "eine",
+    "eines", "des", "auf", "und", "für", "vor",
+];
+
+pub(super) fn formatting_help() {
+    let help = vec![
+        formatdoc!(
+            "{} {}\n",
+            env!("CARGO_PKG_NAME").fg::<Green>().bold(),
+            env!("CARGO_PKG_VERSION")
+        ),
+        formatdoc!("{}", "USAGE".bold()),
+        formatdoc!(
+            "\t{} {} {} {}\n",
+            env!("CARGO_PKG_NAME").fg::<White>().bold(),
+            "format-citekeys".bold(),
+            "--source=<SOURCE>".bold(),
+            "--output=<TARGET>".bold()
+        ),
+        formatdoc!(
+            "
+                \tThis help describes the CLI usage for the citekey formatting
+                \tfunctionality of bibiman. The definition of patterns how the
+                \tcitekeys should be formatted must be set in the config file.
+                \tFor further informations how to use this patterns etc. see:
+                \t{}
+            ",
+            "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman"
+                .italic()
+                .fg::<BrightBlue>()
+        ),
+        formatdoc!("{}", "OPTIONS".bold()),
+        formatdoc!(
+            "
+                \t{}
+                \tShow this help and exit
+            ",
+            "-h, --help".fg::<White>().bold()
+        ),
+        formatdoc!(
+            "
+                \t{}
+                \tDon't apply any changes to the named files. Instead print all
+                \told citekeys and the formatted strings that would have been
+                \tapplied in the format: {} => {}
+            ",
+            "-d, --dry-run".fg::<White>().bold(),
+            "old_key".italic(),
+            "new_key".bold()
+        ),
+        formatdoc! {"
+                \t{}
+                \tThe bibfile for which the citekey formatting should be processed.
+                \tTakes a path as argument.
+            ", "-s, -f, --source=, --file=".fg::<White>().bold()},
+        formatdoc!(
+            "
+                \t{}
+                \tThe bibfile to which the updated content should be written.
+                \tTakes a path as argument. If the file doesn't exist, it will be
+                \tcreated.
+                \tIf the argument isn't used, the original file will be {}!
+            ",
+            "-t, -o, --target=, --output=".fg::<White>().bold(),
+            "overwritten".italic(),
+        ),
+    ];
+    let help = help.join("\n");
+    println!("{}", help);
+}
+
+/// Build the citekey from the patterns defined in the config file
+pub(super) fn build_citekey(
+    entry: &Entry,
+    pattern_fields: &[String],
+    case: Option<&CitekeyCase>,
+) -> String {
+    // mut string the citekey is built from
+    let mut new_citekey = String::new();
+
+    // trailing delimiter of previous field
+    let mut trailing_delimiter: Option<&str> = None;
+
+    // loop over pattern fields process them
+    'field_loop: for pattern in pattern_fields.iter() {
+        // parse single values from pattern field
+        let (field_name, word_count, char_count, inner_delimiter, cur_trailing_delimiter) =
+            split_formatting_pat(pattern);
+
+        // built the part of the citekey from the current pattern field
+        let formatted_field_str = {
+            let mut formatted_str = String::new();
+
+            // preformat the field depending on biblatex value
+            let field = preformat_field(field_name, entry);
+
+            // split at whitespaces, count fields and set counter for processed
+            // splits
+            let mut split_field = field.split_whitespace();
+            let mut words_passed = 0;
+            let field_count = field.split_whitespace().count();
+            let word_count = if let Some(val) = word_count
+                && val <= field_count
+            {
+                val
+            } else {
+                field_count
+            };
+
+            // If there is a trailing delimiter from the previous field, push it
+            if let Some(del) = trailing_delimiter {
+                formatted_str = del.to_string();
+            };
+
+            // If the current field isn't empty, set trailing delimiter for
+            // upcoming loop repitition. If it's empty, start next run of loop
+            // directly
+            if !field.is_empty() {
+                trailing_delimiter = cur_trailing_delimiter;
+            } else {
+                continue 'field_loop;
+            }
+
+            // loop over single parts of current field and add correct delimiter
+            'word_loop: loop {
+                // process the single slices and add correct delimiter
+                if let Some(field_slice) = split_field.next() {
+                    // Create word slice char by char. We need to loop over chars
+                    // instead of a simple bytes index to also catch chars which
+                    // consist of more than one byte (äöüøæ etc...)
+                    let mut word_slice = String::new();
+                    let word_chars = field_slice.chars();
+                    let mut counter = 0;
+                    'char_loop: for mut c in word_chars {
+                        // If camelcase is set, force first char of word to uppercase
+                        if counter == 0 && case == Some(&CitekeyCase::Camel) {
+                            c = c.to_ascii_uppercase()
+                        }
+                        if let Some(len) = char_count
+                            && counter == len
+                        {
+                            break 'char_loop;
+                        }
+                        // if a word slice contains a special char, skip it
+                        if IGNORED_SPECIAL_CHARS.contains(&c) {
+                            continue 'char_loop;
+                        }
+                        word_slice.push(c);
+                        counter += 1;
+                    }
+                    // Don't count empty slices and don't add delimiter to those
+                    if !word_slice.is_empty() {
+                        formatted_str = formatted_str + &word_slice;
+                        words_passed += 1;
+                        if word_count == words_passed {
+                            break 'word_loop;
+                        } else {
+                            formatted_str = formatted_str + inner_delimiter.unwrap_or("");
+                        }
+                    } else {
+                        continue 'word_loop;
+                    }
+                } else {
+                    break 'word_loop;
+                };
+            }
+            formatted_str
+        };
+        new_citekey = new_citekey + &formatted_field_str;
+    }
+    match case {
+        Some(CitekeyCase::Lower) => new_citekey.to_lowercase(),
+        Some(CitekeyCase::Upper) => new_citekey.to_uppercase(),
+        _ => new_citekey,
+    }
+}
+
+/// Preformat some fields which are very common to be used in citekeys
+pub(super) fn preformat_field(field: &str, entry: &Entry) -> String {
+    match field {
+        // Sanitize all macro code from string
+        "title" => {
+            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
+        }
+        // Get author names. Fall back to editors before setting empty string
+        "author" => {
+            if let Ok(authors) = entry.author() {
+                let mut last_names = String::new();
+                for a in authors.iter() {
+                    last_names = last_names + &a.name + " ";
+                }
+                last_names
+            } else if let Ok(editors) = entry.editors() {
+                let mut last_names = String::new();
+                for editortypes in editors.iter() {
+                    for e in editortypes.0.iter() {
+                        last_names = last_names + &e.name + " ";
+                    }
+                }
+                last_names
+            } else {
+                "".to_string()
+            }
+        }
+        // Get year of date field, fallback to year field
+        "year" => {
+            if let Ok(date) = entry.date() {
+                date.to_chunks().format_verbatim()[..4].to_string()
+            } else {
+                entry.get_as::<String>(field).unwrap_or("".into())
+            }
+        }
+        // Sanitize all macro code from string
+        "subtitle" => {
+            sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
+        }
+        "editor" => {
+            if let Ok(editors) = entry.editors() {
+                let mut last_names = String::new();
+                for editortypes in editors.iter() {
+                    for e in editortypes.0.iter() {
+                        last_names = last_names + &e.name + " ";
+                    }
+                }
+                last_names
+            } else {
+                "".to_string()
+            }
+        }
+        "pubtype" | "entrytype" => entry.entry_type.to_string(),
+        _ => entry.get_as::<String>(field).unwrap_or("".into()),
+    }
+}
+
+/// Split a formatting pattern of kind
+/// `<field>;<word count>;<char count>;<inside delimiter>;<trailing delimiter>`,
+/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")`
+pub(super) fn split_formatting_pat(
+    pattern: &str,
+) -> (
+    &str,
+    Option<usize>,
+    Option<usize>,
+    Option<&str>,
+    Option<&str>,
+) {
+    let mut splits = pattern.split(';');
+    (
+        splits
+            .next()
+            .expect("Need field value for formatting citekey"),
+        if let Some(next) = splits.next()
+            && next.len() > 0
+        {
+            next.parse::<usize>().ok()
+        } else {
+            None
+        },
+        if let Some(next) = splits.next()
+            && next.len() > 0
+        {
+            next.parse::<usize>().ok()
+        } else {
+            None
+        },
+        splits.next(),
+        splits.next(),
+    )
+}
+
+#[cfg(test)]
+mod test {
+    use crate::bibiman::citekeys::citekey_utils::split_formatting_pat;
+
+    #[test]
+    fn split_citekey_pattern() {
+        let pattern = "title;3;5;_;_";
+
+        assert_eq!(
+            split_formatting_pat(pattern),
+            ("title", Some(3), Some(5), Some("_"), Some("_"))
+        );
+
+        let pattern = "year";
+
+        assert_eq!(
+            split_formatting_pat(pattern),
+            ("year", None, None, None, None)
+        );
+
+        let pattern = "author;1;;;_";
+        assert_eq!(
+            split_formatting_pat(pattern),
+            ("author", Some(1), None, Some(""), Some("_"))
+        );
+    }
+}
diff --git a/tests/test-config.toml b/tests/test-config.toml
index 2c5ac96..d3e42c5 100644
--- a/tests/test-config.toml
+++ b/tests/test-config.toml
@@ -61,5 +61,10 @@ custom_column = "series"
 # year_color = "135"
 
 [citekey_formatter]
-fields = [ "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ]
+fields = ["entrytype;;;;:", "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ]
+# fields = [ # CamelCase test
+#   "author;2;;;",
+#   "title;5;5;;",
+#   "year"
+# ]
 case = "lowercase"
-- 
cgit v1.2.3


From 467851007e1861834326deee3116aa88fe839f5a Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Mon, 13 Oct 2025 15:45:53 +0200
Subject: Working proof of concept of citekey formatting

---
 CITEKEYS.md                           | 215 +++++++++++++++
 Cargo.lock                            |   7 +
 Cargo.toml                            |   1 +
 README.md                             |  20 ++
 src/bibiman/citekeys.rs               |  69 +++--
 src/bibiman/citekeys/citekey_utils.rs | 105 ++++----
 src/config.rs                         |  69 +++++
 tests/biblatex-test-citekeys.bib      | 476 ++++++++++++++++++++++++++++++++++
 tests/test-config.toml                |   5 +-
 9 files changed, 889 insertions(+), 78 deletions(-)
 create mode 100644 CITEKEYS.md
 create mode 100644 tests/biblatex-test-citekeys.bib

(limited to 'src')

diff --git a/CITEKEYS.md b/CITEKEYS.md
new file mode 100644
index 0000000..912326a
--- /dev/null
+++ b/CITEKEYS.md
@@ -0,0 +1,215 @@
+# Formatting Citekeys<a name="formatting-citekeys"></a>
+
+<!-- mdformat-toc start --slug=github --maxlevel=6 --minlevel=1 -->
+
+- [Formatting Citekeys](#formatting-citekeys)
+  - [Settings](#settings)
+  - [Building Patterns](#building-patterns)
+  - [Ignore Lists and Char Case](#ignore-lists-and-char-case)
+  - [General Tipps](#general-tipps)
+  - [Examples](#examples)
+
+<!-- mdformat-toc end -->
+
+`bibiman` offers the possibility to create new citekeys from the fields of
+BibLaTeX entries. This is done using an easy but powerful pattern-matching
+syntax.
+
+## Settings<a name="settings"></a>
+
+All settings for the citekey generation have to be configured in the used config
+file. The regular path is `XDG_CONFIG_DIR/bibiman/bibiman.toml`. But it can be
+set dynamically with the `-c`/`--config=` global option.
+
+Following values can be set through the config file. A detailed explanation for
+all fields follows below:
+
+```toml
+[citekey_formatter]
+fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ]
+case = "lowercase"
+ascii_only = true
+ignored_chars = [
+    "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", "\"",
+]
+ignored_words = [
+    "the",
+    "a",
+    "an",
+    "of",
+    "for",
+    "in",
+    "at",
+    "to",
+    "and",
+    "der",
+    "die",
+    "das",
+    "ein",
+    "eine",
+    "eines",
+    "des",
+    "auf",
+    "und",
+    "für",
+    "vor",
+]
+```
+
+## Building Patterns<a name="building-patterns"></a>
+
+The main aspect for generating citekeys are the field patterns. They can be set
+through an array in the config file where every array-item represents a single
+BibLaTeX field to be used for generating a part of the citekey.
+
+Every field pattern consists of the following five parts separated by
+semicolons. The general pattern looks like this (every subfield is explained
+below):
+
+*biblatex field name* **;** *max word count* **;** *max char count* **;** *inner delimiter* **;** *trailing delimiter*
+
+- **BibLaTeX field**: the first part represents the field name which value
+  should be used to generate the content part of the citekey. Theoretically, any
+  BibLaTeX field can be selected by name. But there are some fields which are
+  much more common than others; e.g. `author`, `editor`, `title`, `year`/`date`
+  or `entrytype`. Those very common fields are preprocessed; meaning that for
+  instance LaTeX macros are fully stripped from the strings, or that `editor` is
+  a fallback value for `author` if the latter is empty (however, setting
+  `editor` explicitly is still possible). Also using `year` will parse the
+  `date` field too, to ensure a year number.
+- **Max Word**: Defines how many words should maximal be used from the named
+  field. E.g. if the title consists of five words, and the max counter is set to
+  `3` only the first three fields will be used.
+- **Max Chars/Word**: Defines how many chars, counting from the start, of each
+  word will be used to build the citekey. If for instance the value is set to
+  `5`, only the first five chars of any word will be used. Thus, "archaeology"
+  would be stripped down to "archa".
+- **Inner Delimiter**: Sets the delimiter char used between words from the
+  currently named field; e.g. to separate the words of the `title` field.
+- **Trailing Delimiter**: Sets the delimiter which separates the current fields
+  value from the following. This delimiter is only printed if the following
+  field has some content.
+
+For example, to use the `title` field, print maximal three words and of those
+only the first five chars, single words separated by underscore and the whole
+field separated by equal sign, insert the following pattern field into the
+`fields` array:
+
+`title;3;5;_;=`
+
+Except the BibLaTeX field name, all other parts of the pattern can be left
+blank. If the field name is the only value set, semicolon delimiters are also
+not necessary. But if only one of the following parts should be set, all
+delimiters need to be used. E.g. those are both valid: `title` or `title;;;_;=`.
+The first would print all words of the title, no matter the length, not
+separated by any char. The last would also print all words of the title, but
+single words separated by underscores and the whole pattern value separated from
+the following by an equal sign. This is not valid: `title;;_` since `bibiman`
+can't know if the underscore means a delimiter (and which) or the max char
+count.
+
+The pattern array inside the config file takes multiple pattern fields like the
+predecing. This allows an elaborated citekey pattern which takes into account
+multiple fields.
+
+## Ignore Lists and Char Case<a name="ignore-lists-and-char-case"></a>
+
+Beside the field patterns there are some other options to define how citekeys
+should be built.
+
+`ascii_only=<BOOL>`
+: If set to `true`, which is the default, non-ascii chars are mapped to their
+  ascii equivalent. For example, the German `ä` would be mapped to `a`. The
+  Turkish `ş` or Greek `σ`/`ς` would be mapped to `s`. If set to `false` all are
+  kept as they are. But this could lead to errors running LaTeX on the file.
+
+`case=<CASE>`
+: If used, sets the case of the chars in the citekey. Valid values are
+  `uppercase`, `lowercase` or `camelcase`. Both first should be clear, the
+  latter means typical camel case also beginning the *first word* with an
+  uppercase letter; also referenced as upper camel case or Pascal case.
+
+`ignored_chars=<ARRAY>`
+: Defines chars which should be ignored during parsing (meaning not print them).
+  The default list contains 33 special chars and is part of the default config
+  file (in out-commented state). Be aware, setting this key will completely
+  overwrite the default list!
+
+`ignored_words=<ARRAY>`
+: A list of words which should be ignored parsing field values. The default list
+  contains about 20 very commonly used words in English and German; like
+  articles, pronouns or connector words. Like with `ignored_chars` setting this
+  key will completely overwrite the default list!
+
+## General Tipps<a name="general-tipps"></a>
+
+- Most importantly: *always use the **`--dry-run`** option first*! This will
+  print a list of old and new values for all citekeys in the file without
+  changing anything.
+- After finding a good overall pattern, *use the `--output=` option* to create a
+  new file and don't overwrite your existent file. Thus, your original file
+  isn't broken if the key formatter produces some unwanted output.
+- Even very long patterns are possible, they are not encouraged, since it bloats
+  the bibfiles.
+- The same accounts for *too short* patterns; if the pattern is to unspecific,
+  it bares the risk of producing doublettes (e.g. single author and year only).
+  But the citekey generator will not check for doublettes!
+- It is possible to keep special chars and use them as delimiters. But this
+  might cause problems other programs and CLI tools in particular, since many
+  special chars are reserved for shell operations. For instance, it will very
+  likely break the note file feature of `bibiman` which doesn't accept many
+  special chars.
+
+## Examples<a name="examples"></a>
+
+To make the process more clear a few examples might help. Following bibfile is
+assumed:
+
+```latex
+@article{Bos2023,
+    title         = {{LaTeX}, metadata, and publishing workflows},
+    author        = {Bos, Joppe W. and {McCurley}, Kevin S.},
+    year          = {2023},
+    month         = apr,
+    journal       = {arXiv},
+    number        = {{arXiv}:2301.08277},
+    doi           = {10.48550/arXiv.2301.08277},
+    url           = {http://arxiv.org/abs/2301.08277},
+    urldate       = {2023-08-22},
+    note          = {type: article},
+}
+@book{Bhambra2021,
+    title         = {Colonialism and \textbf{Modern Social Theory}},
+    author        = {Bhambra, Gurminder K. and Holmwood, John},
+    location      = {Cambridge and Medford},
+    publisher     = {Polity Press},
+    date          = {2021},
+
+```
+
+And the following values set in the config file:
+
+```toml
+fields = [
+  # Just print the whole entrytype and a colon as trailing delimiter
+  "entrytype;;;;:", 
+  # Print all author names in full length, names separated by dash,
+  # the whole field by underscore
+  "author;;;-;_", 
+  # Print first 4 words of title, first 3 chars of every word only. Title words
+  # separated by equal sign, the whole field by underscore
+  "title;4;3;=;_", 
+  # Print all words of location, but only first 4 chars of every word. Single words
+  # separated by colon, whole field by underscore
+  "location;;4;:;_", 
+  # Just print the whole year
+  "year",
+]
+case = "lowercase"
+ascii_only = true
+```
+
+The combination of those setting will produce the following citekeys:
+
+- **`article:bos-mccurley_lat=met=pub=wor_2023`**
+- **`book:bhambra-holmwood_col=mod=soc=the_camb:medf_2021`**
diff --git a/Cargo.lock b/Cargo.lock
index a27636e..0adb4e7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -103,6 +103,7 @@ dependencies = [
  "biblatex",
  "color-eyre",
  "crossterm",
+ "deunicode",
  "dirs",
  "editor-command",
  "figment",
@@ -323,6 +324,12 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "deunicode"
+version = "1.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04"
+
 [[package]]
 name = "dirs"
 version = "5.0.1"
diff --git a/Cargo.toml b/Cargo.toml
index abf1eee..0c07c51 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -41,6 +41,7 @@ owo-colors = "4.2.2"
 logos = "0.15.1"
 phf = { version = "0.13.1", features = ["macros"] }
 indoc = "2.0.6"
+deunicode = "1.6.2"
 
 [workspace.metadata.cross.target.aarch64-unknown-linux-gnu]
 # Install libssl-dev:arm64, see <https://github.com/cross-rs/cross/blob/main/docs/custom_images.md#adding-dependencies-to-existing-images>
diff --git a/README.md b/README.md
index 4929509..3fb81c8 100644
--- a/README.md
+++ b/README.md
@@ -24,9 +24,11 @@
     - [Ubuntu/Debian](#ubuntudebian)
     - [Void Linux](#void-linux)
   - [Usage](#usage)
+    - [CLI for citekey formatting](#cli-for-citekey-formatting)
   - [Configuration](#configuration)
     - [Location of Config File](#location-of-config-file)
     - [General Configuration](#general-configuration)
+    - [Citekey formatting](#citekey-formatting)
     - [Color Configuration](#color-configuration)
   - [Features](#features)
   - [Keybindings](#keybindings)
@@ -196,6 +198,13 @@ bibman tests/multi-files/
 bibiman tests/biblatex-test.bib tests/multi-files/
 ```
 
+### CLI for citekey formatting<a name="cli-for-citekey-formatting"></a>
+
+Beside the TUI `bibiman` can format and replace citekeys. To make use of this
+feature run the program with the `format-citekeys` subcommand. For more
+information on this use `bibiman format-citekeys --help` and the
+[docs](./CITEKEYS.md).
+
 ## Configuration<a name="configuration"></a>
 
 ### Location of Config File<a name="location-of-config-file"></a>
@@ -268,6 +277,11 @@ note_symbol = "󰧮"
 ## Possible values are "journaltitle", "organization", "instituion", "publisher"
 ## and "pubtype" (which is the default)
 custom_column = "pubtype"
+
+[citekey_formatter]
+fields = []
+ascii_only = true
+case = "lowercase"
 ```
 
 `bibfiles`
@@ -326,6 +340,12 @@ custom_column = "pubtype"
   good advice to use a rather wide terminal window when using a value like
   `journaltitle`.
 
+### Citekey formatting<a name="citekey-formatting"></a>
+
+`bibiman` now also offers a citekey generating feature. This enables to reformat
+all citekeys based on an elaborated pattern matching syntax. For furthter
+information and examples see the [docs](CITEKEYS.md).
+
 ### Color Configuration<a name="color-configuration"></a>
 
 Furthermore, it is now possible to customize the colors. The following values
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 2f56947..0cec28e 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -29,7 +29,7 @@ use serde::{Deserialize, Serialize};
 
 use crate::{
     bibiman::citekeys::citekey_utils::{build_citekey, formatting_help},
-    config::BibiConfig,
+    config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS},
 };
 
 mod citekey_utils;
@@ -60,6 +60,7 @@ pub(crate) struct CitekeyFormatting {
     case: Option<CitekeyCase>,
     old_new_keys_map: Vec<(String, String)>,
     dry_run: bool,
+    ascii_only: bool,
 }
 
 impl CitekeyFormatting {
@@ -69,14 +70,15 @@ impl CitekeyFormatting {
     ) -> color_eyre::Result<()> {
         let mut formatter = CitekeyFormatting::default();
 
-        formatter.fields = cfg
-            .citekey_formatter
-            .fields
-            .clone()
-            .ok_or_eyre("Need to define fields correctly in config file")?;
+        formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!(
+            "Need to define {} correctly in config file",
+            "citekey pattern fields".red()
+        ))?;
 
         formatter.case = cfg.citekey_formatter.case.clone();
 
+        formatter.ascii_only = cfg.citekey_formatter.ascii_only;
+
         if formatter.fields.is_empty() {
             return Err(eyre!(
                 "To format all citekeys, you need to provide {} values in the config file",
@@ -105,13 +107,26 @@ impl CitekeyFormatting {
         formatter.bib_entries = Bibliography::parse(&bibstring)
             .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?;
 
+        let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars {
+            chars.as_slice()
+        } else {
+            IGNORED_SPECIAL_CHARS.as_slice()
+        };
+
+        let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words {
+            words.as_slice()
+        } else {
+            &*IGNORED_WORDS.as_slice()
+        };
+
         formatter
-            .do_formatting()
+            .do_formatting(ignored_chars, ignored_words)
             .rev_sort_new_keys_by_len()
             .update_file()?;
 
         Ok(())
     }
+
     /// Start Citekey formatting with building a new instance of `CitekeyFormatting`
     /// Formatting is processed file by file, because `bibman` can handle
     /// multi-file setups.
@@ -144,16 +159,24 @@ impl CitekeyFormatting {
             case: cfg.citekey_formatter.case.clone(),
             old_new_keys_map: Vec::new(),
             dry_run: false,
+            ascii_only: cfg.citekey_formatter.ascii_only,
         })
     }
 
     /// Process the actual formatting. The citekey of every entry will be updated.
-    pub fn do_formatting(&mut self) -> &mut Self {
+    pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self {
         let mut old_new_keys: Vec<(String, String)> = Vec::new();
         for entry in self.bib_entries.iter() {
             old_new_keys.push((
                 entry.key.clone(),
-                build_citekey(entry, &self.fields, self.case.as_ref()),
+                build_citekey(
+                    entry,
+                    &self.fields,
+                    self.case.as_ref(),
+                    self.ascii_only,
+                    ignored_chars,
+                    ignored_words,
+                ),
             ));
         }
 
@@ -215,12 +238,15 @@ mod tests {
 
     use biblatex::Bibliography;
 
-    use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting};
+    use crate::{
+        bibiman::citekeys::{CitekeyCase, CitekeyFormatting},
+        config::{IGNORED_SPECIAL_CHARS, IGNORED_WORDS},
+    };
 
     #[test]
     fn format_citekey_test() {
         let src = r"
-        @article{bos_latex_metadata_and_publishing_workflows_2023,
+        @article{Bos2023,
             title         = {{LaTeX}, metadata, and publishing workflows},
             author        = {Bos, Joppe W. and {McCurley}, Kevin S.},
             year          = {2023},
@@ -232,7 +258,7 @@ mod tests {
             urldate       = {2023-08-22},
             note          = {type: article},
         }
-        @book{bhambra_colonialism_social_theory_2021,
+        @book{Bhambra2021,
             title         = {Colonialism and \textbf{Modern Social Theory}},
             author        = {Bhambra, Gurminder K. and Holmwood, John},
             location      = {Cambridge and Medford},
@@ -247,29 +273,24 @@ mod tests {
             fields: vec![
                 "entrytype;;;;:".into(),
                 "author;;;-;_".into(),
-                "title;4;3;_;_".into(),
+                "title;4;3;=;_".into(),
                 "location;;4;:;_".into(),
                 "year".into(),
             ],
-            case: None,
+            case: Some(CitekeyCase::Lower),
             old_new_keys_map: Vec::new(),
             dry_run: false,
+            ascii_only: true,
         };
-        let _ = formatting_struct.do_formatting();
+        let _ = formatting_struct
+            .do_formatting(IGNORED_SPECIAL_CHARS.as_slice(), &*IGNORED_WORDS.as_slice());
         assert_eq!(
             formatting_struct.old_new_keys_map.get(0).unwrap().1,
-            "article:Bos-McCurley_LaT_met_and_pub_Empt_2023"
+            "article:bos-mccurley_lat=met=pub=wor_2023"
         );
         assert_eq!(
             formatting_struct.old_new_keys_map.get(1).unwrap().1,
-            "book:Bhambra-Holmwood_Col_and_Mod_Soc_Camb:and:Medf_2021"
-        );
-        formatting_struct.case = Some(CitekeyCase::Lower);
-        let _ = formatting_struct.do_formatting().rev_sort_new_keys_by_len();
-        // now the longer citekey is processed first and its in lowercase!
-        assert_eq!(
-            formatting_struct.old_new_keys_map.get(0).unwrap().1,
-            "book:bhambra-holmwood_col_and_mod_soc_camb:and:medf_2021"
+            "book:bhambra-holmwood_col=mod=soc=the_camb:medf_2021"
         );
     }
 
diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs
index ee2c849..5f70224 100644
--- a/src/bibiman/citekeys/citekey_utils.rs
+++ b/src/bibiman/citekeys/citekey_utils.rs
@@ -16,21 +16,14 @@
 /////
 
 use biblatex::{ChunksExt, Entry, Type};
+use deunicode::deunicode;
 use indoc::formatdoc;
 use owo_colors::{
     OwoColorize,
     colors::{BrightBlue, Green, White},
 };
 
-use crate::{
-    bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully},
-    config::IGNORED_SPECIAL_CHARS,
-};
-
-const IGNORE_WORDS: [&str; 20] = [
-    "the", "a", "an", "of", "for", "in", "at", "to", "and", "der", "die", "das", "ein", "eine",
-    "eines", "des", "auf", "und", "für", "vor",
-];
+use crate::bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully};
 
 pub(super) fn formatting_help() {
     let help = vec![
@@ -104,6 +97,9 @@ pub(super) fn build_citekey(
     entry: &Entry,
     pattern_fields: &[String],
     case: Option<&CitekeyCase>,
+    ascii_only: bool,
+    ignored_chars: &[char],
+    ignored_words: &[String],
 ) -> String {
     // mut string the citekey is built from
     let mut new_citekey = String::new();
@@ -114,7 +110,7 @@ pub(super) fn build_citekey(
     // loop over pattern fields process them
     'field_loop: for pattern in pattern_fields.iter() {
         // parse single values from pattern field
-        let (field_name, word_count, char_count, inner_delimiter, cur_trailing_delimiter) =
+        let (field_name, max_words, max_chars, inner_delimiter, cur_trailing_delimiter) =
             split_formatting_pat(pattern);
 
         // built the part of the citekey from the current pattern field
@@ -126,16 +122,9 @@ pub(super) fn build_citekey(
 
             // split at whitespaces, count fields and set counter for processed
             // splits
-            let mut split_field = field.split_whitespace();
+            let split_field = field.split_whitespace();
             let mut words_passed = 0;
             let field_count = field.split_whitespace().count();
-            let word_count = if let Some(val) = word_count
-                && val <= field_count
-            {
-                val
-            } else {
-                field_count
-            };
 
             // If there is a trailing delimiter from the previous field, push it
             if let Some(del) = trailing_delimiter {
@@ -152,47 +141,57 @@ pub(super) fn build_citekey(
             }
 
             // loop over single parts of current field and add correct delimiter
-            'word_loop: loop {
-                // process the single slices and add correct delimiter
-                if let Some(field_slice) = split_field.next() {
-                    // Create word slice char by char. We need to loop over chars
-                    // instead of a simple bytes index to also catch chars which
-                    // consist of more than one byte (äöüøæ etc...)
-                    let mut word_slice = String::new();
-                    let word_chars = field_slice.chars();
-                    let mut counter = 0;
-                    'char_loop: for mut c in word_chars {
-                        // If camelcase is set, force first char of word to uppercase
-                        if counter == 0 && case == Some(&CitekeyCase::Camel) {
-                            c = c.to_ascii_uppercase()
-                        }
-                        if let Some(len) = char_count
-                            && counter == len
-                        {
-                            break 'char_loop;
-                        }
-                        // if a word slice contains a special char, skip it
-                        if IGNORED_SPECIAL_CHARS.contains(&c) {
-                            continue 'char_loop;
-                        }
+            // process the single slices and add correct delimiter
+            'word_loop: for (idx, field_slice) in split_field.enumerate() {
+                // if the current slice is a common word from the ignore list,
+                // skip it.
+                if ignored_words.contains(&field_slice.to_lowercase()) {
+                    continue;
+                }
+
+                // Create word slice char by char. We need to loop over chars
+                // instead of a simple bytes index to also catch chars which
+                // consist of more than one byte (äöüøæ etc...)
+                let mut word_slice = String::new();
+                let word_chars = field_slice.chars();
+                let mut counter = 0;
+                'char_loop: for mut c in word_chars {
+                    // If camelcase is set, force first char of word to uppercase
+                    if counter == 0 && case == Some(&CitekeyCase::Camel) {
+                        c = c.to_ascii_uppercase()
+                    }
+                    if let Some(len) = max_chars
+                        && counter >= len
+                    {
+                        break 'char_loop;
+                    }
+                    // if a word slice contains a special char, skip it
+                    if ignored_chars.contains(&c) {
+                        continue 'char_loop;
+                    }
+                    // if non-ascii chars should be mapped, check if needed and do it
+                    if let Some(chars) = deunicode::deunicode_char(c)
+                        && ascii_only
+                    {
+                        word_slice.push_str(chars);
+                        counter += chars.len();
+                    } else {
                         word_slice.push(c);
                         counter += 1;
                     }
-                    // Don't count empty slices and don't add delimiter to those
-                    if !word_slice.is_empty() {
-                        formatted_str = formatted_str + &word_slice;
-                        words_passed += 1;
-                        if word_count == words_passed {
-                            break 'word_loop;
-                        } else {
-                            formatted_str = formatted_str + inner_delimiter.unwrap_or("");
-                        }
+                }
+                // Don't count empty slices and don't add delimiter to those
+                if !word_slice.is_empty() {
+                    formatted_str = formatted_str + &word_slice;
+                    words_passed += 1;
+                    if max_words.is_some_and(|max| max == words_passed) || idx + 1 == field_count {
+                        break 'word_loop;
                     } else {
-                        continue 'word_loop;
+                        formatted_str = formatted_str + inner_delimiter.unwrap_or("");
                     }
                 } else {
-                    break 'word_loop;
-                };
+                    continue 'word_loop;
+                }
             }
             formatted_str
         };
diff --git a/src/config.rs b/src/config.rs
index b1c4b07..7c1a0f8 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -20,6 +20,7 @@ use std::{
     io::{Write, stdin},
     path::PathBuf,
     str::FromStr,
+    sync::LazyLock,
 };
 
 use color_eyre::{eyre::Result, owo_colors::OwoColorize};
@@ -40,6 +41,31 @@ pub const IGNORED_SPECIAL_CHARS: [char; 33] = [
     '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"',
 ];
 
+pub static IGNORED_WORDS: LazyLock<Vec<String>> = LazyLock::new(|| {
+    vec![
+        String::from("the"),
+        String::from("a"),
+        String::from("an"),
+        String::from("of"),
+        String::from("for"),
+        String::from("in"),
+        String::from("at"),
+        String::from("to"),
+        String::from("and"),
+        String::from("der"),
+        String::from("die"),
+        String::from("das"),
+        String::from("ein"),
+        String::from("eine"),
+        String::from("eines"),
+        String::from("des"),
+        String::from("auf"),
+        String::from("und"),
+        String::from("für"),
+        String::from("vor"),
+    ]
+});
+
 const DEFAULT_CONFIG: &str = r##"
 # [general]
 ## Default files/dirs which are loaded on startup
@@ -118,6 +144,40 @@ const DEFAULT_CONFIG: &str = r##"
 ## Convert chars to specified case. Possible values:
 ## "upper", "uppercase", "lower", "lowercase"
 # case = "lowercase"
+
+## Map all unicode chars to their pure ascii equivalent
+# ascii_only = true
+
+## List of special chars that'll be ignored when building citekeys.
+## A custom list will overwrite the default list
+# ignored_chars = [
+#     "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", """,
+# ]
+
+## List of words that'll be ignored when building citekeys.
+## A custom list will overwrite the default list
+# ignored_words = [
+#     "the",
+#     "a",
+#     "an",
+#     "of",
+#     "for",
+#     "in",
+#     "at",
+#     "to",
+#     "and",
+#     "der",
+#     "die",
+#     "das",
+#     "ein",
+#     "eine",
+#     "eines",
+#     "des",
+#     "auf",
+#     "und",
+#     "für",
+#     "vor",
+# ]
 "##;
 
 /// Main struct of the config file. Contains substructs/headings in toml
@@ -171,6 +231,9 @@ pub struct Colors {
 pub struct CitekeyFormatter {
     pub fields: Option<Vec<String>>,
     pub case: Option<CitekeyCase>,
+    pub ascii_only: bool,
+    pub ignored_chars: Option<Vec<char>>,
+    pub ignored_words: Option<Vec<String>>,
 }
 
 impl Default for BibiConfig {
@@ -194,6 +257,9 @@ impl Default for BibiConfig {
             citekey_formatter: CitekeyFormatter {
                 fields: None,
                 case: None,
+                ascii_only: true,
+                ignored_chars: None,
+                ignored_words: None,
             },
         }
     }
@@ -224,6 +290,9 @@ impl BibiConfig {
             citekey_formatter: CitekeyFormatter {
                 fields: None,
                 case: None,
+                ascii_only: true,
+                ignored_chars: None,
+                ignored_words: None,
             },
         }
     }
diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib
new file mode 100644
index 0000000..9767f97
--- /dev/null
+++ b/tests/biblatex-test-citekeys.bib
@@ -0,0 +1,476 @@
+@set{set,
+    entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006},
+    annotation = {A \texttt{set} with three members.},
+}
+
+@set{set,
+    entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam},
+    annotation = {A \texttt{set} with three members discussing the standard
+                  model of particle physics.},
+}
+
+@collection{collection:matuz-miller_contem=litera=critic_1990gale,
+    title = {Contemporary Literary Criticism},
+    year = {1990},
+    location = {Detroit},
+    publisher = {Gale},
+    volume = {61},
+    pages = {204--208},
+    editor = {Matuz, Roger and Miller, Helen},
+    keywords = {narration},
+    langid = {english},
+    langidopts = {variant=american},
+    annotation = {A \texttt{collection} entry providing the excerpt information
+                  for the \texttt{article:doody_heming=style=jakes_1974} entry. Note the format of the \texttt{
+                  pages} field},
+}
+
+@article{article:aksin-turkmen_effect=immobi=on_2006,
+    title = {Effect of immobilization on catalytic characteristics of saturated
+             {Pd-N}-heterocyclic carbenes in {Mizoroki-Heck} reactions},
+    author = {Aks{\i}n, {\"O}zge and T{\"u}rkmen, Hayati and Artok , Levent and
+              { \c{C}}etinkaya, Bekir and Ni, Chaoying and B{\" u}y{ \"u}kg{\"u}
+              ng{ \" o}r, Orhan and {\"O}zkal, Erhan},
+    volume = {691},
+    number = {13},
+    pages = {3027--3036},
+    journaltitle = jomch,
+    date = {2006},
+    indextitle = {Effect of immobilization on catalytic characteristics},
+}
+
+@article{article:angenendt_honore=salvat=vom_2002,
+    title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der Patrozinienkunde},
+    shorttitle = {In Honore Salvatoris},
+    author = {Angenendt, Arnold},
+    volume = {97},
+    pages = {431--456, 791--823},
+    journaltitle = {Revue d'Histoire Eccl{\'e}siastique},
+    date = {2002},
+    langid = {german},
+    indextitle = {In Honore Salvatoris},
+    annotation = {A German article in a French journal. Apart from that, a
+                  typical \texttt{article} entry. Note the \texttt{indextitle}
+                  field},
+}
+
+@book{book:aristotle_de=anima_1907cambr#unive#press,
+    title = {De Anima},
+    author = {Aristotle},
+    location = {Cambridge},
+    publisher = cup,
+    date = {1907},
+    editor = {Hicks, Robert Drew},
+    keywords = {primary, ancient, philosophy, athens},
+    langid = {english},
+    langidopts = {variant=british},
+    annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{
+                  editor}},
+}
+
+@book{book:aristotle_physic_1929g#p#putna,
+    title = {Physics},
+    shorttitle = {Physics},
+    author = {Aristotle},
+    location = {New York},
+    publisher = {G. P. Putnam},
+    url = {https://www.infobooks.org/authors/classic/aristotle-books/#Physic},
+    date = {1929},
+    translator = {Wicksteed, P. H. and Cornford, F. M.},
+    keywords = {primary, ancient, philosophy},
+    langid = {english},
+    langidopts = {variant=american},
+    file = {~/Documents/coding/projects/bibiman/tests/book:aristotle_physic_1929g#p#putna.pdf},
+    annotation = {A \texttt{book} entry with a \texttt{translator} field},
+    abstract = {The Physics is a work by Aristotle dedicated to the study of
+                nature. Regarded by Heidegger as "the fundamental work of Western
+                philosophy", it presents the renowned distinction between the
+                four types of cause, as well as reflections on chance, motion,
+                infinity, and other fundamental concepts. It is here that
+                Aristotle sets out his celebrated paradox of time.},
+}
+
+@book{book:aristotle_poetic_1968clare#press,
+    title = {Poetics},
+    shorttitle = {Poetics},
+    author = {Aristotle},
+    location = {Oxford},
+    publisher = {Clarendon Press},
+    series = {Clarendon {Aristotle}},
+    date = {1968},
+    editor = {Lucas, D. W.},
+    keywords = {primary},
+    langid = {english},
+    langidopts = {variant=british},
+    annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{
+                  editor} as well as a \texttt{series} field},
+}
+
+@mvbook{mvbook:aristotle_rhetor=aristo=with_1877cambr#unive#press,
+    title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward
+             Meredith Cope}},
+    shorttitle = {Rhetoric},
+    author = {Aristotle},
+    publisher = cup,
+    date = {1877},
+    editor = {Cope, Edward Meredith},
+    commentator = {Cope, Edward Meredith},
+    volumes = {3},
+    keywords = {primary},
+    langid = {english},
+    langidopts = {variant=british},
+    sorttitle = {Rhetoric of Aristotle},
+    indextitle = {Rhetoric of {Aristotle}, The},
+    annotation = {A commented edition. Note the concatenation of the \texttt{
+                  editor} and \texttt{commentator} fields as well as the \texttt{
+                  volumes}, \texttt{sorttitle}, and \texttt{indextitle} fields},
+}
+
+@book{book:augustine_hetero=cataly=synthe_1995marce#dekke,
+    title = {Heterogeneous catalysis for the synthetic \textit{chemist}},
+    shorttitle = {Heterogeneous catalysis},
+    author = {Augustine, Robert L.},
+    location = {New York},
+    publisher = {Marcel Dekker},
+    date = {1995},
+    langid = {english},
+    langidopts = {variant=american},
+    annotation = {A plain \texttt{book} entry},
+    keywords = {chemistry},
+}
+
+@book{book:averroes_epistl=on=possib_1982jewis#theol#semin#ameri,
+    title = {The Epistle on the Possibility of Conjunction with the Active
+             Intellect by {Ibn Rushd} with the Commentary of {Moses Narboni}},
+    shorttitle = {Possibility of Conjunction},
+    author = {Averroes},
+    location = {New York},
+    publisher = {Jewish Theological Seminary of America},
+    series = {Moreshet: Studies in {Jewish} History, Literature and Thought},
+    number = {7},
+    date = {1982},
+    editor = {Bland, Kalman P.},
+    translator = {Bland, Kalman P.},
+    keywords = {primary},
+    langid = {english},
+    langidopts = {variant=american},
+    indextitle = {Epistle on the Possibility of Conjunction, The},
+    annotation = {A \texttt{book} entry with a \texttt{series} and a \texttt{
+                  number}. Note the concatenation of the \texttt{editor} and
+                  \texttt{translator} fields as well as the \texttt{indextitle}
+                  field},
+}
+
+@article{article:baez-lauda_higher=algebr=v_2004,
+    title = {Higher-Dimensional Algebra {V}: 2-Groups},
+    author = {Baez, John C. and Lauda, Aaron D.},
+    volume = {12},
+    pages = {423--491},
+    journaltitle = {Theory and Applications of Categories},
+    date = {2004},
+    version = {3},
+    eprint = {math/0307200v3},
+    eprinttype = {arxiv},
+    langid = {english},
+    keywords = {math},
+    langidopts = {variant=american},
+    annotation = {An \texttt{article} with \texttt{eprint} and \texttt{
+                  eprinttype} fields. Note that the arXiv reference is
+                  transformed into a clickable link if \texttt{hyperref} support
+                  has been enabled. Compare \texttt{baez\slash online}, which is
+                  the same item given as an \texttt{online} entry},
+}
+
+@article{article:bertram-wentworth_gromov=invari=holomo_1996,
+    title = {Gromov invariants for holomorphic maps on {Riemann} surfaces},
+    shorttitle = {Gromov invariants},
+    author = {Bertram, Aaron and Wentworth, Richard},
+    volume = {9},
+    number = {2},
+    pages = {529--571},
+    journaltitle = jams,
+    date = {1996},
+    langid = {english},
+    langidopts = {variant=american},
+    annotation = {An \texttt{article} entry with a \texttt{volume} and a \texttt
+                  {number} field},
+}
+
+@article{article:doody_heming=style=jakes_1974,
+    title = {Hemingway's Style and {Jake's} Narration},
+    author = {Doody, Terrence},
+    year = {1974},
+    journal = {The Journal of Narrative Technique},
+    volume = {4},
+    number = {3},
+    pages = {212--225},
+    langid = {english},
+    langidopts = {variant=american},
+    related = {matuz:article:doody_heming=style=jakes_1974},
+    relatedstring = {\autocap{e}xcerpt in},
+    annotation = {An \texttt{article} entry cited as an excerpt from a \texttt{
+                  collection} entry. Note the format of the \texttt{related} and
+                  \texttt{relatedstring} fields},
+}
+
+@article{article:gillies_herder=prepar=goethe_1933,
+    title = {Herder and the Preparation of {Goethe's} Idea of World Literature},
+    author = {Gillies, Alexander},
+    series = {newseries},
+    volume = {9},
+    pages = {46--67},
+    journaltitle = {Publications of the English Goethe Society},
+    date = {1933},
+    langid = {english},
+    langidopts = {variant=british},
+    annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt
+                  {volume} field. Note that format of the \texttt{series} field
+                  in the database file},
+}
+
+@article{article:glashow_partia=symmet=weak_1961,
+    title = {Partial Symmetries of Weak Interactions},
+    author = {Glashow, Sheldon},
+    volume = {22},
+    pages = {579--588},
+    journaltitle = {Nucl.~Phys.},
+    date = {1961},
+}
+
+@article{article:herrmann-ofele_carboc=carben=as_2006,
+    title = {A carbocyclic carbene as an efficient catalyst ligand for {C--C}
+             coupling reactions},
+    author = {Herrmann, Wolfgang A. and {\"O}fele, Karl and Schneider, Sabine K.
+              and Herdtweck, Eberhardt and Hoffmann, Stephan D.},
+    volume = {45},
+    number = {23},
+    pages = {3859--3862},
+    journaltitle = anch-ie,
+    date = {2006},
+    indextitle = {Carbocyclic carbene as an efficient catalyst, A},
+}
+
+@article{article:hostetler-wingate_alkane=gold=cluste_1998,
+    title = {Alkanethiolate gold cluster molecules with core diameters from 1.5
+             to 5.2~{nm}},
+    shorttitle = {Alkanethiolate gold cluster molecules},
+    author = {Hostetler, Michael J. and Wingate, Julia E. and Zhong, Chuan-Jian
+              and Harris, Jay E. and Vachet, Richard W. and Clark, Michael R. and
+              Londono, J. David and Green, Stephen J. and Stokes, Jennifer J. and
+              Wignall, George D. and Glish, Gary L. and Porter, Marc D. and Evans
+              , Neal D. and Murray, Royce W.},
+    volume = {14},
+    number = {1},
+    pages = {17--30},
+    journaltitle = {Langmuir},
+    date = {1998},
+    subtitle = {Core and monolayer properties as a function of core size},
+    langid = {english},
+    langidopts = {variant=american},
+    indextitle = {Alkanethiolate gold cluster molecules},
+    annotation = {An \texttt{article} entry with \arabic{author} authors. By
+                  default, long author and editor lists are automatically
+                  truncated. This is configurable},
+}
+
+@article{article:kastenholz-hunenberger_comput=method=ionic_2006,
+    title = {Computation of methodology\hyphen independent ionic solvation free
+             energies from molecular simulations},
+    author = {Kastenholz, M. A. and H{\"u}nenberger, Philippe H.},
+    volume = {124},
+    doi = {10.1063/1.2172593},
+    journaltitle = jchph,
+    date = {2006},
+    subtitle = {{I}. {The} electrostatic potential in molecular liquids},
+    eid = {124106},
+    langid = {english},
+    langidopts = {variant=american},
+    indextitle = {Computation of ionic solvation free energies},
+    annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{
+                  doi} field. Note that the \textsc{doi} is transformed into a
+                  clickable link if \texttt{hyperref} support has been enabled},
+    abstract = {The computation of \texttt{ionic} solvation free energies from atomistic
+                simulations is a surprisingly difficult problem that has found no
+                satisfactory solution for more than 15 years. The reason is that
+                the charging free energies evaluated from such simulations are
+                affected by very large errors. One of these is related to the
+                choice of a specific convention for summing up the contributions
+                of solvent charges to the electrostatic potential in the ionic
+                cavity, namely, on the basis of point charges within entire
+                solvent molecules (M scheme) or on the basis of individual point
+                charges (P scheme). The use of an inappropriate convention may
+                lead to a charge-independent offset in the calculated potential,
+                which depends on the details of the summation scheme, on the
+                quadrupole-moment trace of the solvent molecule, and on the
+                approximate form used to represent electrostatic interactions in
+                the system. However, whether the M or P scheme (if any)
+                represents the appropriate convention is still a matter of
+                on-going debate. The goal of the present article is to settle
+                this long-standing controversy by carefully analyzing (both
+                analytically and numerically) the properties of the electrostatic
+                potential in molecular liquids (and inside cavities within them).
+                },
+}
+
+@article{article:sarfraz-razzak_techni=sectio=algori_2002,
+    title = {Technical section: {An} algorithm for automatic capturing of the
+             font outlines},
+    author = {M. Sarfraz and M. F. A. Razzak},
+    year = {2002},
+    journal = {Computers and Graphics},
+    volume = {26},
+    number = {5},
+    pages = {795--804},
+    issn = {0097-8493},
+    annotation = {An \texttt{article} entry with an \texttt{issn} field},
+}
+
+@article{article:reese_georgi=anglos=diplom_1958,
+    title = {Georgia in {Anglo-Spanish} Diplomacy, 1736--1739},
+    author = {Reese, Trevor R.},
+    series = {3},
+    volume = {15},
+    pages = {168--190},
+    journaltitle = {William and Mary Quarterly},
+    date = {1958},
+    langid = {english},
+    langidopts = {variant=american},
+    annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt
+                  {volume} field. Note the format of the series. If the value of
+                  the \texttt{series} field is an integer, this number is printed
+                  as an ordinal and the string \enquote*{series} is appended
+                  automatically},
+}
+
+@article{article:shore_twiceb=once=concei_1991,
+    title = {Twice-Born, Once Conceived},
+    author = {Shore, Bradd},
+    series = {newseries},
+    volume = {93},
+    number = {1},
+    pages = {9--27},
+    journaltitle = {American Anthropologist},
+    date = {1991-03},
+    subtitle = {Meaning Construction and Cultural Cognition},
+    annotation = {An \texttt{article} entry with \texttt{series}, \texttt{volume
+                  }, and \texttt{number} fields. Note the format of the \texttt{
+                  series} which is a localization key},
+}
+
+@article{article:sigfridsson-ryde_compar=method=derivi_1998,
+    title = {Comparison of methods for deriving atomic charges from the
+             electrostatic potential and moments},
+    author = {Sigfridsson, Emma and Ryde, Ulf},
+    volume = {19},
+    number = {4},
+    pages = {377--395},
+    doi = {10.1002/(SICI)1096-987X(199803)19:4<377::AID-JCC1>3.0.CO;2-P},
+    journaltitle = {Journal of Computational Chemistry},
+    date = {1998},
+    langid = {english},
+    langidopts = {variant=american},
+    indextitle = {Methods for deriving atomic charges},
+    annotation = {An \texttt{article} entry with \texttt{volume}, \texttt{number
+                  }, and \texttt{doi} fields. Note that the \textsc{doi} is
+                  transformed into a clickable link if \texttt{hyperref} support
+                  has been enabled},
+    abstract = {Four methods for deriving partial atomic charges from the
+                quantum chemical electrostatic potential (CHELP, CHELPG,
+                Merz-Kollman, and RESP) have been compared and critically
+                evaluated. It is shown that charges strongly depend on how and
+                where the potential points are selected. Two alternative methods
+                are suggested to avoid the arbitrariness in the point-selection
+                schemes and van der Waals exclusion radii: CHELP-BOW, which also
+                estimates the charges from the electrostatic potential, but with
+                potential points that are Boltzmann-weighted after their
+                occurrence in actual simulations using the energy function of the
+                program in which the charges will be used, and CHELMO, which
+                estimates the charges directly from the electrostatic multipole
+                moments. Different criteria for the quality of the charges are
+                discussed.},
+}
+
+@article{article:spiegelberg_intent=intent=schola_1969,
+    title = {\mkbibquote{Intention} und \mkbibquote{Intentionalit{\"a}t} in der
+             Scholastik, bei Brentano und Husserl},
+    shorttitle = {Intention und Intentionalit{\"a}t},
+    author = {Spiegelberg, Herbert},
+    volume = {29},
+    pages = {189--216},
+    journaltitle = {Studia Philosophica},
+    date = {1969},
+    langid = {german},
+    sorttitle = {Intention und Intentionalitat in der Scholastik, bei Brentano
+                 und Husserl},
+    indexsorttitle = {Intention und Intentionalitat in der Scholastik, bei
+                      Brentano und Husserl},
+    annotation = {An \texttt{article} entry. Note the \texttt{sorttitle} and
+                  \texttt{indexsorttitle} fields and the markup of the quotes in
+                  the database file},
+}
+
+@article{article:springer_mediae=pilgri=routes_1950,
+    title = {Mediaeval Pilgrim Routes from {Scandinavia} to {Rome}},
+    shorttitle = {Mediaeval Pilgrim Routes},
+    author = {Springer, Otto},
+    volume = {12},
+    pages = {92--122},
+    journaltitle = {Mediaeval Studies},
+    date = {1950},
+    langid = {english},
+    langidopts = {variant=british},
+    annotation = {A plain \texttt{article} entry},
+}
+
+@article{article:weinberg_model=lepton_1967,
+    title = {A Model of Leptons},
+    author = {Weinberg, Steven},
+    volume = {19},
+    pages = {1264--1266},
+    journaltitle = {Phys.~Rev.~Lett.},
+    date = {1967},
+}
+
+@string{anch-ie = {Angew.~Chem. Int.~Ed.}}
+
+@string{cup = {Cambridge University Press}}
+
+@string{dtv = {Deutscher Taschenbuch-Verlag}}
+
+@string{hup = {Harvard University Press}}
+
+@string{jams = {J.~Amer. Math. Soc.}}
+
+@string{jchph = {J.~Chem. Phys.}}
+
+@string{jomch = {J.~Organomet. Chem.}}
+
+@string{pup = {Princeton University Press}}
+
+@incollection{incollection:westfahl_true=fronti,
+    title = {The True Frontier},
+    author = {Westfahl, Gary},
+    pages = {55--65},
+    subtitle = {Confronting and Avoiding the Realities of Space in {American}
+                Science Fiction Films},
+    crossref = {westfahl:frontier},
+    langid = {english},
+    langidopts = {variant=american},
+    indextitle = {True Frontier, The},
+    annotation = {A cross-referenced article from a \texttt{collection}. This is
+                  an \texttt{incollection} entry with a \texttt{crossref} field.
+                  Note the \texttt{subtitle} and \texttt{indextitle} fields},
+}
+
+@article{article:yoon-ryu_pallad=pincer=comple_2006,
+    title = {Palladium pincer complexes with reduced bond angle strain:
+             efficient catalysts for the {Heck} reaction},
+    author = {Yoon, Myeong S. and Ryu, Dowook and Kim, Jeongryul and Ahn, Kyo
+              Han},
+    volume = {25},
+    number = {10},
+    pages = {2409--2411},
+    journaltitle = {Organometallics},
+    date = {2006},
+    indextitle = {Palladium pincer complexes},
+}
diff --git a/tests/test-config.toml b/tests/test-config.toml
index d3e42c5..8dd8014 100644
--- a/tests/test-config.toml
+++ b/tests/test-config.toml
@@ -61,10 +61,13 @@ custom_column = "series"
 # year_color = "135"
 
 [citekey_formatter]
-fields = ["entrytype;;;;:", "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ]
+fields = ["shorthand;;;;+","entrytype;;;;:", "author;2;;-;_", "title;3;6;=;_", "year", "publisher;;5;#;" ]
 # fields = [ # CamelCase test
 #   "author;2;;;",
 #   "title;5;5;;",
 #   "year"
 # ]
 case = "lowercase"
+ascii_only = true
+# ignored_words = ["the"]
+# ignored_chars = ["?", "."]
-- 
cgit v1.2.3


From c62b83e02359c24973344699116acc12b4a04108 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Tue, 14 Oct 2025 08:54:35 +0200
Subject: skip set and xdata entries by default

---
 src/bibiman/citekeys.rs               |  7 +++++--
 src/bibiman/citekeys/citekey_utils.rs |  5 +++++
 src/config.rs                         | 10 ++++++++++
 tests/biblatex-test-citekeys.bib      |  4 ++--
 4 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 0cec28e..999c6cb 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -28,7 +28,7 @@ use owo_colors::OwoColorize;
 use serde::{Deserialize, Serialize};
 
 use crate::{
-    bibiman::citekeys::citekey_utils::{build_citekey, formatting_help},
+    bibiman::citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help},
     config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS},
 };
 
@@ -167,6 +167,10 @@ impl CitekeyFormatting {
     pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self {
         let mut old_new_keys: Vec<(String, String)> = Vec::new();
         for entry in self.bib_entries.iter() {
+            // Skip specific entries
+            if SKIPPED_ENTRIES.contains(&entry.entry_type.to_string().to_lowercase().as_str()) {
+                continue;
+            }
             old_new_keys.push((
                 entry.key.clone(),
                 build_citekey(
@@ -181,7 +185,6 @@ impl CitekeyFormatting {
         }
 
         self.old_new_keys_map = old_new_keys;
-        // dbg!(&self.old_new_keys_map);
 
         self
     }
diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs
index 5f70224..58a8274 100644
--- a/src/bibiman/citekeys/citekey_utils.rs
+++ b/src/bibiman/citekeys/citekey_utils.rs
@@ -15,6 +15,8 @@
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 /////
 
+use std::sync::LazyLock;
+
 use biblatex::{ChunksExt, Entry, Type};
 use deunicode::deunicode;
 use indoc::formatdoc;
@@ -25,6 +27,8 @@ use owo_colors::{
 
 use crate::bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully};
 
+pub(super) const SKIPPED_ENTRIES: [&str; 2] = ["set", "xdata"];
+
 pub(super) fn formatting_help() {
     let help = vec![
         formatdoc!(
@@ -200,6 +204,7 @@ pub(super) fn build_citekey(
     match case {
         Some(CitekeyCase::Lower) => new_citekey.to_lowercase(),
         Some(CitekeyCase::Upper) => new_citekey.to_uppercase(),
+        // otherwise skip, since camelcase is processed in char loop
         _ => new_citekey,
     }
 }
diff --git a/src/config.rs b/src/config.rs
index 7c1a0f8..b8d8b45 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -52,6 +52,10 @@ pub static IGNORED_WORDS: LazyLock<Vec<String>> = LazyLock::new(|| {
         String::from("at"),
         String::from("to"),
         String::from("and"),
+        String::from("him"),
+        String::from("her"),
+        String::from("his"),
+        String::from("hers"),
         String::from("der"),
         String::from("die"),
         String::from("das"),
@@ -63,6 +67,12 @@ pub static IGNORED_WORDS: LazyLock<Vec<String>> = LazyLock::new(|| {
         String::from("und"),
         String::from("für"),
         String::from("vor"),
+        String::from("er"),
+        String::from("sie"),
+        String::from("es"),
+        String::from("ihm"),
+        String::from("ihr"),
+        String::from("ihnen"),
     ]
 });
 
diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib
index 9767f97..34c2f33 100644
--- a/tests/biblatex-test-citekeys.bib
+++ b/tests/biblatex-test-citekeys.bib
@@ -1,9 +1,9 @@
-@set{set,
+@set{SET,
     entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006},
     annotation = {A \texttt{set} with three members.},
 }
 
-@set{set,
+@set{stdmodel,
     entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam},
     annotation = {A \texttt{set} with three members discussing the standard
                   model of particle physics.},
-- 
cgit v1.2.3


From 9b21727bd151a3fda2133b9da12eec588068130e Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Tue, 14 Oct 2025 14:30:56 +0200
Subject: use citekey formatter for adding new entries via doi

---
 src/bibiman.rs          | 105 +++++++++++++-----------------------------
 src/bibiman/citekeys.rs | 118 ++++++++++++++++++++++++++----------------------
 2 files changed, 97 insertions(+), 126 deletions(-)

(limited to 'src')

diff --git a/src/bibiman.rs b/src/bibiman.rs
index 3158d73..392ae95 100644
--- a/src/bibiman.rs
+++ b/src/bibiman.rs
@@ -16,22 +16,23 @@
 /////
 
 use crate::app::expand_home;
+use crate::bibiman::citekeys::CitekeyFormatting;
 use crate::bibiman::entries::EntryTableColumn;
 use crate::bibiman::{bibisetup::*, search::BibiSearch};
 use crate::cliargs::CLIArgs;
 use crate::config::BibiConfig;
-use crate::tui::popup::{PopupArea, PopupItem, PopupKind};
 use crate::tui::Tui;
+use crate::tui::popup::{PopupArea, PopupItem, PopupKind};
 use crate::{app, cliargs};
 use crate::{bibiman::entries::EntryTable, bibiman::keywords::TagList};
 use arboard::Clipboard;
-use color_eyre::eyre::{Context, Error, Result};
+use biblatex::Bibliography;
+use color_eyre::eyre::{Context, Error, Result, eyre};
 use crossterm::event::KeyCode;
 use editor_command::EditorBuilder;
 use ratatui::widgets::ScrollbarState;
-use regex::Regex;
 use std::ffi::OsStr;
-use std::fs::{self, read_to_string};
+use std::fs::{self};
 use std::fs::{File, OpenOptions};
 use std::io::Write;
 use std::path::PathBuf;
@@ -190,7 +191,9 @@ impl Bibiman {
                     self.popup_area.popup_message = message.unwrap().to_owned();
                     Ok(())
                 } else {
-                    Err(Error::msg("You need to past at least a message via Some(&str) to create a message popup"))
+                    Err(Error::msg(
+                        "You need to past at least a message via Some(&str) to create a message popup",
+                    ))
                 }
             }
             PopupKind::MessageError => {
@@ -202,7 +205,9 @@ impl Bibiman {
                     self.popup_area.popup_message = message.unwrap().to_owned();
                     Ok(())
                 } else {
-                    Err(Error::msg("You need to past at least a message via Some(&str) to create a message popup"))
+                    Err(Error::msg(
+                        "You need to past at least a message via Some(&str) to create a message popup",
+                    ))
                 }
             }
             PopupKind::OpenRes => {
@@ -680,23 +685,32 @@ impl Bibiman {
         // Index of selected popup field
         let popup_idx = self.popup_area.popup_state.selected().unwrap();
 
-        // regex pattern to match citekey in fetched bibtexstring
-        let pattern = Regex::new(r"\{([^\{\},]*),").unwrap();
+        let new_bib_entry = Bibliography::parse(&self.popup_area.popup_sel_item)
+            .map_err(|e| eyre!("Couldn't parse downloaded bib entry: {}", e.to_string()))?;
 
-        let citekey = pattern
-            .captures(&self.popup_area.popup_sel_item)
-            .unwrap()
-            .get(1)
-            .unwrap()
-            .as_str()
-            .to_string();
+        let formatted_struct =
+            if let Some(formatter) = CitekeyFormatting::new(cfg, new_bib_entry.clone()) {
+                Some(formatter.do_formatting())
+            } else {
+                None
+            };
+
+        let (new_citekey, entry_string) = if let Some(mut formatter) = formatted_struct {
+            (
+                formatter.get_citekey_pair(0).unwrap().1,
+                formatter.print_updated_bib_as_string(),
+            )
+        } else {
+            let keys = new_bib_entry.keys().collect::<Vec<&str>>();
+            (keys[0].to_string(), new_bib_entry.to_biblatex_string())
+        };
 
         // Check if new file or existing file was choosen
         let mut file = if self.popup_area.popup_list[popup_idx]
             .0
             .contains("Create new file")
         {
-            let citekey = PathBuf::from(&citekey);
+            let citekey = PathBuf::from(&new_citekey);
             // Get path of current files
             let path: PathBuf = if self.main_bibfiles[0].is_file() {
                 self.main_bibfiles[0].parent().unwrap().to_owned()
@@ -714,45 +728,18 @@ impl Bibiman {
         } else {
             let file_path = &self.main_bibfiles[popup_idx - 1];
 
-            // Check if similar citekey already exists
-            let file_string = read_to_string(&file_path).unwrap();
-
-            // If choosen file contains entry with fetched citekey, append an
-            // char to the citekey so no dublettes are created
-            if file_string.contains(&citekey) {
-                let mut new_citekey = String::new();
-
-                // Loop over ASCII alpabetic chars and check again if citekey with
-                // appended char exists. If yes, move to next char and test again.
-                // If the citekey is free, use it and break the loop
-                for c in b'a'..=b'z' {
-                    let append_char = (c as char).to_string();
-                    new_citekey = citekey.clone() + &append_char;
-                    if !file_string.contains(&new_citekey) {
-                        break;
-                    }
-                }
-
-                let new_entry_string_clone = self.popup_area.popup_sel_item.clone();
-
-                // Replace the double citekey with newly created
-                self.popup_area.popup_sel_item = pattern
-                    .replace(&new_entry_string_clone, format!("{{{},", &new_citekey))
-                    .to_string();
-            }
-
             OpenOptions::new().append(true).open(file_path).unwrap()
         };
         // Optionally, add a newline before the content
         file.write_all(b"\n")?;
         // Write content to file
-        file.write_all(self.popup_area.popup_sel_item.as_bytes())?;
+        file.write_all(entry_string.as_bytes())?;
         // Update the database and the lists to reflect the new content
         self.update_lists(cfg);
         self.close_popup();
 
         // Select newly created entry
-        self.select_entry_by_citekey(&citekey);
+        self.select_entry_by_citekey(&new_citekey);
 
         Ok(())
     }
@@ -1285,38 +1272,10 @@ impl Bibiman {
 
 #[cfg(test)]
 mod tests {
-    use regex::Captures;
-
-    use super::*;
-
     #[test]
     fn citekey_pattern() {
         let citekey = format!("{{{},", "a_key_2001");
 
         assert_eq!(citekey, "{a_key_2001,")
     }
-
-    #[test]
-    fn regex_capture_citekey() {
-        let re = Regex::new(r"\{([^\{\},]*),").unwrap();
-
-        let bibstring = String::from("@article{citekey77_2001:!?, author = {Hanks, Tom}, title = {A great book}, year = {2001}}");
-
-        let citekey = re.captures(&bibstring).unwrap().get(1).unwrap().as_str();
-
-        assert_eq!(citekey, "citekey77_2001:!?");
-
-        if bibstring.contains(&citekey) {
-            let append_char = "a";
-            let new_entry_string_clone = bibstring.clone();
-
-            let updated_bibstring = re
-                .replace(&new_entry_string_clone, |caps: &Captures| {
-                    format!("{{{}{},", &caps[1], &append_char)
-                })
-                .to_string();
-
-            assert_eq!(updated_bibstring, "@article{citekey77_2001:!?a, author = {Hanks, Tom}, title = {A great book}, year = {2001}}")
-        }
-    }
 }
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 999c6cb..4516b28 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -51,24 +51,27 @@ pub enum CitekeyCase {
 }
 
 #[derive(Debug, Default, Clone)]
-pub(crate) struct CitekeyFormatting {
+pub(crate) struct CitekeyFormatting<'a> {
     /// bibfile to replace keys at. The optional fields defines a differing
     /// output file to write to, otherwise original file will be overwritten.
-    bibfile_path: (PathBuf, Option<PathBuf>),
     bib_entries: Bibliography,
     fields: Vec<String>,
     case: Option<CitekeyCase>,
     old_new_keys_map: Vec<(String, String)>,
     dry_run: bool,
     ascii_only: bool,
+    ignored_chars: &'a [char],
+    ignored_words: &'a [String],
 }
 
-impl CitekeyFormatting {
+impl<'a> CitekeyFormatting<'a> {
     pub(crate) fn parse_citekey_cli(
         parser: &mut lexopt::Parser,
         cfg: &BibiConfig,
     ) -> color_eyre::Result<()> {
         let mut formatter = CitekeyFormatting::default();
+        let mut source_file = PathBuf::new();
+        let mut target_file: Option<PathBuf> = None;
 
         formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!(
             "Need to define {} correctly in config file",
@@ -93,78 +96,73 @@ impl CitekeyFormatting {
                 }
                 Short('d') | Long("dry-run") => formatter.dry_run = true,
                 Short('s') | Short('f') | Long("source") | Long("file") => {
-                    formatter.bibfile_path.0 = parser.value()?.into()
+                    source_file = parser.value()?.into()
                 }
                 Short('t') | Short('o') | Long("target") | Long("output") => {
-                    formatter.bibfile_path.1 = Some(parser.value()?.into())
+                    target_file = Some(parser.value()?.into())
                 }
                 _ => return Err(arg.unexpected().into()),
             }
         }
 
-        let bibstring = std::fs::read_to_string(&formatter.bibfile_path.0)?;
+        let bibstring = std::fs::read_to_string(&source_file)?;
 
         formatter.bib_entries = Bibliography::parse(&bibstring)
             .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?;
 
-        let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars {
+        formatter.ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars {
             chars.as_slice()
         } else {
             IGNORED_SPECIAL_CHARS.as_slice()
         };
 
-        let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words {
+        formatter.ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words {
             words.as_slice()
         } else {
             &*IGNORED_WORDS.as_slice()
         };
 
         formatter
-            .do_formatting(ignored_chars, ignored_words)
+            .do_formatting()
             .rev_sort_new_keys_by_len()
-            .update_file()?;
+            .update_file(source_file, target_file)?;
 
         Ok(())
     }
 
     /// Start Citekey formatting with building a new instance of `CitekeyFormatting`
-    /// Formatting is processed file by file, because `bibman` can handle
-    /// multi-file setups.
-    /// The `Bibliography` inserted will be edited in place with the new citekeys.
-    /// Thus, in the end the `bib_entries` field will hold the updated `Bibliography`
-    pub fn new<P: AsRef<Path>>(
-        cfg: &BibiConfig,
-        path: P,
-        target: Option<P>,
-        bib_entries: Bibliography,
-    ) -> color_eyre::Result<Self> {
-        let fields = cfg
-            .citekey_formatter
-            .fields
-            .clone()
-            .expect("Need to define fields in config to format citekeys");
+    pub fn new(cfg: &'a BibiConfig, bib_entries: Bibliography) -> Option<Self> {
+        let fields = cfg.citekey_formatter.fields.clone().unwrap_or(Vec::new());
         if fields.is_empty() {
-            return Err(eyre!(
-                "To format all citekeys, you need to provide {} values in the config file",
-                "fields".bold()
-            ));
+            return None;
         }
-        Ok(Self {
-            bibfile_path: (
-                path.as_ref().to_path_buf(),
-                target.map(|p| p.as_ref().to_path_buf()),
-            ),
+        let ignored_chars = if let Some(chars) = &cfg.citekey_formatter.ignored_chars {
+            chars.as_slice()
+        } else {
+            IGNORED_SPECIAL_CHARS.as_slice()
+        };
+
+        let ignored_words = if let Some(words) = &cfg.citekey_formatter.ignored_words {
+            words.as_slice()
+        } else {
+            &*IGNORED_WORDS.as_slice()
+        };
+
+        Some(Self {
             bib_entries,
             fields,
             case: cfg.citekey_formatter.case.clone(),
             old_new_keys_map: Vec::new(),
             dry_run: false,
             ascii_only: cfg.citekey_formatter.ascii_only,
+            ignored_chars,
+            ignored_words,
         })
     }
 
-    /// Process the actual formatting. The citekey of every entry will be updated.
-    pub fn do_formatting(&mut self, ignored_chars: &[char], ignored_words: &[String]) -> &mut Self {
+    /// Process the actual formatting. Updated citekeys will be stored in a the
+    /// `self.old_new_keys_map` vector consisting of pairs (old key, new key).
+    pub fn do_formatting(mut self) -> Self {
         let mut old_new_keys: Vec<(String, String)> = Vec::new();
         for entry in self.bib_entries.iter() {
             // Skip specific entries
@@ -178,8 +176,8 @@ impl CitekeyFormatting {
                     &self.fields,
                     self.case.as_ref(),
                     self.ascii_only,
-                    ignored_chars,
-                    ignored_words,
+                    self.ignored_chars,
+                    self.ignored_words,
                 ),
             ));
         }
@@ -189,8 +187,12 @@ impl CitekeyFormatting {
         self
     }
 
-    /// Write entries with updated citekeys to bibfile
-    pub fn update_file(&mut self) -> color_eyre::Result<()> {
+    /// Write formatted citekeys to bibfile replacing the old keys in all fields
+    pub fn update_file<P: AsRef<Path>>(
+        &mut self,
+        source_file: P,
+        target_file: Option<P>,
+    ) -> color_eyre::Result<()> {
         if self.dry_run {
             println!("Following citekeys would be formatted: old => new\n");
             self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0));
@@ -198,11 +200,10 @@ impl CitekeyFormatting {
                 println!("{} => {}", old.italic(), new.bold())
             }
         } else {
-            let source_file = self.bibfile_path.0.as_path();
-            let target_file = if let Some(path) = &self.bibfile_path.1 {
-                path
+            let target_file = if let Some(path) = target_file {
+                path.as_ref().to_path_buf()
             } else {
-                source_file
+                source_file.as_ref().to_path_buf()
             };
             let mut content = std::fs::read_to_string(source_file)?;
 
@@ -228,23 +229,34 @@ impl CitekeyFormatting {
     /// You are **very encouraged** to call this method before `update_file()` to
     /// prevent replacing citekeys partly which afterwards wont match the pattern
     /// anymore.
-    pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self {
+    pub fn rev_sort_new_keys_by_len(mut self) -> Self {
         self.old_new_keys_map
             .sort_by(|a, b| b.0.len().cmp(&a.0.len()));
         self
     }
+
+    /// Update the `Bibliography` of the `CitekeyFormatting` struct and return
+    /// it as `String`.
+    pub fn print_updated_bib_as_string(&mut self) -> String {
+        let mut content = self.bib_entries.to_biblatex_string();
+        for (old_key, new_key) in self.old_new_keys_map.iter() {
+            content = content.replace(old_key, new_key);
+        }
+        content
+    }
+
+    pub fn get_citekey_pair(&self, idx: usize) -> Option<(String, String)> {
+        self.old_new_keys_map.get(idx).map(|pair| pair.to_owned())
+    }
 }
 
 #[cfg(test)]
 mod tests {
-    use std::path::PathBuf;
-
-    use biblatex::Bibliography;
-
     use crate::{
         bibiman::citekeys::{CitekeyCase, CitekeyFormatting},
         config::{IGNORED_SPECIAL_CHARS, IGNORED_WORDS},
     };
+    use biblatex::Bibliography;
 
     #[test]
     fn format_citekey_test() {
@@ -270,8 +282,7 @@ mod tests {
         }
         ";
         let bibliography = Bibliography::parse(src).unwrap();
-        let mut formatting_struct = CitekeyFormatting {
-            bibfile_path: (PathBuf::new(), None),
+        let formatting_struct = CitekeyFormatting {
             bib_entries: bibliography,
             fields: vec![
                 "entrytype;;;;:".into(),
@@ -284,9 +295,10 @@ mod tests {
             old_new_keys_map: Vec::new(),
             dry_run: false,
             ascii_only: true,
+            ignored_chars: &IGNORED_SPECIAL_CHARS,
+            ignored_words: &IGNORED_WORDS,
         };
-        let _ = formatting_struct
-            .do_formatting(IGNORED_SPECIAL_CHARS.as_slice(), &*IGNORED_WORDS.as_slice());
+        let formatting_struct = formatting_struct.do_formatting();
         assert_eq!(
             formatting_struct.old_new_keys_map.get(0).unwrap().1,
             "article:bos-mccurley_lat=met=pub=wor_2023"
-- 
cgit v1.2.3


From 3cd41cb1bc2046f1710175999305ab08508bae69 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Tue, 14 Oct 2025 16:28:20 +0200
Subject: option to reformat citekey-based basenames of attachment files

---
 CITEKEYS.md                           |   8 +-
 src/bibiman/bibisetup.rs              |  98 +++----
 src/bibiman/citekeys.rs               |  85 +++++-
 src/bibiman/citekeys/citekey_utils.rs |  17 +-
 tests/biblatex-test-citekeys.bib      | 476 ----------------------------------
 5 files changed, 152 insertions(+), 532 deletions(-)
 delete mode 100644 tests/biblatex-test-citekeys.bib

(limited to 'src')

diff --git a/CITEKEYS.md b/CITEKEYS.md
index 19bd497..828e557 100644
--- a/CITEKEYS.md
+++ b/CITEKEYS.md
@@ -147,11 +147,17 @@ should be built.
 - Most importantly: *always use the **`--dry-run`** option first*! This will
   print a list of old and new values for all citekeys in the file without
   changing anything. For the test file of this repo and using the pattern from
-  the [section below](#examples) `----dry-run` produces the following output:
+  the [section below](#examples) `--dry-run` produces the following output:
   [![niri-screenshot-2025-10-14-10-11-06.png](https://i.postimg.cc/SxxRkY8K/niri-screenshot-2025-10-14-10-11-06.png)](https://postimg.cc/bs4pRJmX)
 - After finding a good overall pattern, *use the `--output=` option* to create a
   new file and don't overwrite your existing file. Thus, your original file
   isn't broken if the key formatter produces some unwanted output.
+- Its possible to update citekey based PDF and note files directly when
+  formatting the citekeys using the `-u`/`--update-attachments` option. Thus,
+  all PDFs and notes are already linked to the correct entries after updating
+  the citekeys. Since this operation can break things, use it with `--dry-run`
+  first. As with regular citekeys this will print all changes without processing
+  anything.
 - Even very long patterns are possible, they are not encouraged, since it bloats
   the bibfiles.
 - The same accounts for *too short* patterns; if the pattern is to unspecific,
diff --git a/src/bibiman/bibisetup.rs b/src/bibiman/bibisetup.rs
index a83a507..a817236 100644
--- a/src/bibiman/bibisetup.rs
+++ b/src/bibiman/bibisetup.rs
@@ -22,6 +22,7 @@ use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::ffi::{OsStr, OsString};
+use std::path::Path;
 use std::{fs, path::PathBuf};
 use walkdir::WalkDir;
 
@@ -318,23 +319,27 @@ impl BibiSetup {
         cfg: &BibiConfig,
     ) -> Vec<BibiData> {
         let mut pdf_files = if cfg.general.pdf_path.is_some() {
-            collect_file_paths(cfg.general.pdf_path.as_ref().unwrap(), &Some(vec!["pdf"]))
+            collect_file_paths(
+                cfg.general.pdf_path.as_ref().unwrap(),
+                Some(vec!["pdf".into()].as_slice()),
+            )
+        } else {
+            None
+        };
+        let ext = if let Some(ext) = &cfg.general.note_extensions
+            && cfg.general.note_path.is_some()
+        {
+            // let mut ext: Vec<&str> = Vec::new();
+            // for e in cfg.general.note_extensions.as_ref().unwrap().iter() {
+            //     ext.push(e);
+            // }
+            Some(ext.as_slice())
         } else {
             None
         };
-        let ext: Option<Vec<&str>> =
-            if cfg.general.note_path.is_some() && cfg.general.note_extensions.is_some() {
-                let mut ext: Vec<&str> = Vec::new();
-                for e in cfg.general.note_extensions.as_ref().unwrap().iter() {
-                    ext.push(e);
-                }
-                Some(ext)
-            } else {
-                None
-            };
         let mut note_files =
             if cfg.general.note_path.is_some() && cfg.general.note_extensions.is_some() {
-                collect_file_paths(cfg.general.note_path.as_ref().unwrap(), &ext)
+                collect_file_paths(cfg.general.note_path.as_ref().unwrap(), ext.clone())
             } else {
                 None
             };
@@ -369,7 +374,7 @@ impl BibiSetup {
                     file_field: filepaths.1,
                     subtitle: Self::get_subtitle(k, bibliography),
                     notes: if note_files.is_some() {
-                        Self::get_notepath(k, &mut note_files, &ext)
+                        Self::get_notepath(k, &mut note_files, ext)
                     } else {
                         None
                     },
@@ -575,18 +580,18 @@ impl BibiSetup {
     ) -> (Option<Vec<OsString>>, bool) {
         if biblio.get(citekey).unwrap().file().is_ok() {
             (
-                Some(vec![biblio
-                    .get(citekey)
-                    .unwrap()
-                    .file()
-                    .unwrap()
-                    .trim()
-                    .into()]),
+                Some(vec![
+                    biblio.get(citekey).unwrap().file().unwrap().trim().into(),
+                ]),
                 true,
             )
         } else if pdf_files.is_some() {
             (
-                Self::merge_filepath_or_none_two(&citekey, pdf_files, vec!["pdf"]),
+                Self::merge_filepath_or_none_two(
+                    &citekey,
+                    pdf_files,
+                    vec!["pdf".into()].as_slice(),
+                ),
                 false,
             )
         } else {
@@ -597,10 +602,10 @@ impl BibiSetup {
     pub fn get_notepath(
         citekey: &str,
         note_files: &mut Option<HashMap<String, Vec<PathBuf>>>,
-        ext: &Option<Vec<&str>>,
+        ext: Option<&[String]>,
     ) -> Option<Vec<OsString>> {
         if let Some(e) = ext {
-            Self::merge_filepath_or_none_two(citekey, note_files, e.to_vec())
+            Self::merge_filepath_or_none_two(citekey, note_files, e)
         } else {
             None
         }
@@ -627,7 +632,7 @@ impl BibiSetup {
     fn merge_filepath_or_none_two(
         citekey: &str,
         files: &mut Option<HashMap<String, Vec<PathBuf>>>,
-        extensions: Vec<&str>,
+        extensions: &[String],
     ) -> Option<Vec<OsString>> {
         let mut file = Vec::new();
 
@@ -645,11 +650,7 @@ impl BibiSetup {
             }
         }
 
-        if file.is_empty() {
-            None
-        } else {
-            Some(file)
-        }
+        if file.is_empty() { None } else { Some(file) }
     }
 }
 
@@ -663,15 +664,17 @@ impl BibiSetup {
 ///
 /// Passing [`None`] as argument for extensions will result in collecting all files
 /// from the given directory and its subdirectories!
-pub fn collect_file_paths(
-    file_dir: &PathBuf,
-    extensions: &Option<Vec<&str>>,
+pub fn collect_file_paths<P: AsRef<Path>>(
+    file_dir: P,
+    extensions: Option<&[String]>,
 ) -> Option<HashMap<String, Vec<PathBuf>>> {
     let mut files: HashMap<String, Vec<PathBuf>> = HashMap::new();
 
+    let file_dir = file_dir.as_ref();
+
     // Expand tilde to /home/user
     let file_dir = if file_dir.starts_with("~") {
-        &app::expand_home(&file_dir)
+        &app::expand_home(&file_dir.to_path_buf())
     } else {
         file_dir
     };
@@ -682,13 +685,13 @@ pub fn collect_file_paths(
             let f = file.unwrap().into_path();
             if f.is_file()
                 && f.extension().is_some()
-                && extensions.as_ref().is_some_and(|v| {
+                && extensions.is_some_and(|v| {
                     v.contains(
                         &f.extension()
                             .unwrap_or_default()
                             .to_ascii_lowercase()
-                            .to_str()
-                            .unwrap_or_default(),
+                            .to_string_lossy()
+                            .to_string(),
                     )
                 })
             {
@@ -721,11 +724,7 @@ pub fn collect_file_paths(
         }
     }
 
-    if files.is_empty() {
-        None
-    } else {
-        Some(files)
-    }
+    if files.is_empty() { None } else { Some(files) }
 }
 
 #[cfg(test)]
@@ -759,8 +758,11 @@ mod tests {
             ],
         );
 
-        let matches =
-            BibiSetup::merge_filepath_or_none_two("citekey", &mut Some(files), vec!["md", "pdf"]);
+        let matches = BibiSetup::merge_filepath_or_none_two(
+            "citekey",
+            &mut Some(files),
+            vec!["md".into(), "pdf".into()].as_slice(),
+        );
 
         assert_eq!(
             matches.clone().unwrap().iter().next().unwrap().to_owned(),
@@ -770,9 +772,11 @@ mod tests {
             matches.clone().unwrap().last().unwrap().to_owned(),
             OsString::from("/one/other/citekey.pdf")
         );
-        assert!(!matches
-            .clone()
-            .unwrap()
-            .contains(&OsString::from("/one/other/citekey2.pdf")));
+        assert!(
+            !matches
+                .clone()
+                .unwrap()
+                .contains(&OsString::from("/one/other/citekey2.pdf"))
+        );
     }
 }
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 4516b28..8f70ab0 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -16,6 +16,7 @@
 /////
 
 use std::{
+    ffi::OsStr,
     fs::OpenOptions,
     io::Write,
     path::{Path, PathBuf},
@@ -28,7 +29,10 @@ use owo_colors::OwoColorize;
 use serde::{Deserialize, Serialize};
 
 use crate::{
-    bibiman::citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help},
+    bibiman::{
+        bibisetup::collect_file_paths,
+        citekeys::citekey_utils::{SKIPPED_ENTRIES, build_citekey, formatting_help},
+    },
     config::{BibiConfig, IGNORED_SPECIAL_CHARS, IGNORED_WORDS},
 };
 
@@ -72,6 +76,7 @@ impl<'a> CitekeyFormatting<'a> {
         let mut formatter = CitekeyFormatting::default();
         let mut source_file = PathBuf::new();
         let mut target_file: Option<PathBuf> = None;
+        let mut update_files = false;
 
         formatter.fields = cfg.citekey_formatter.fields.clone().ok_or_eyre(format!(
             "Need to define {} correctly in config file",
@@ -101,6 +106,7 @@ impl<'a> CitekeyFormatting<'a> {
                 Short('t') | Short('o') | Long("target") | Long("output") => {
                     target_file = Some(parser.value()?.into())
                 }
+                Short('u') | Long("update-attachments") => update_files = true,
                 _ => return Err(arg.unexpected().into()),
             }
         }
@@ -122,10 +128,13 @@ impl<'a> CitekeyFormatting<'a> {
             &*IGNORED_WORDS.as_slice()
         };
 
-        formatter
-            .do_formatting()
-            .rev_sort_new_keys_by_len()
-            .update_file(source_file, target_file)?;
+        let mut updated_formatter = formatter.do_formatting().rev_sort_new_keys_by_len();
+
+        updated_formatter.update_file(source_file, target_file)?;
+
+        if update_files {
+            updated_formatter.update_notes_pdfs(cfg)?;
+        }
 
         Ok(())
     }
@@ -194,7 +203,13 @@ impl<'a> CitekeyFormatting<'a> {
         target_file: Option<P>,
     ) -> color_eyre::Result<()> {
         if self.dry_run {
-            println!("Following citekeys would be formatted: old => new\n");
+            println!(
+                "{}\n",
+                "Following citekeys would be formatted: old => new"
+                    .bold()
+                    .underline()
+                    .white()
+            );
             self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0));
             for (old, new) in &self.old_new_keys_map {
                 println!("{} => {}", old.italic(), new.bold())
@@ -235,6 +250,64 @@ impl<'a> CitekeyFormatting<'a> {
         self
     }
 
+    pub fn update_notes_pdfs(&self, cfg: &BibiConfig) -> color_eyre::Result<()> {
+        if let Some(pdf_path) = &cfg.general.pdf_path {
+            self.update_files_by_citekey_basename(pdf_path, vec!["pdf".into()].as_slice())?;
+        }
+        if let Some(note_path) = &cfg.general.note_path
+            && let Some(ext) = &cfg.general.note_extensions
+        {
+            self.update_files_by_citekey_basename(note_path, ext.as_slice())?;
+        }
+        Ok(())
+    }
+
+    fn update_files_by_citekey_basename<P: AsRef<Path>>(
+        &self,
+        path: P,
+        ext: &[String],
+    ) -> color_eyre::Result<()> {
+        let files = collect_file_paths(path.as_ref(), Some(ext));
+        if self.dry_run {
+            println!(
+                "\n{}\n",
+                "Following paths would be updated:"
+                    .underline()
+                    .bold()
+                    .white()
+            )
+        }
+        if let Some(mut f) = files {
+            for (old_key, new_key) in self.old_new_keys_map.iter() {
+                for e in ext {
+                    let old_basename = old_key.to_owned() + "." + e;
+                    if let Some(item) = f.get_mut(&old_basename) {
+                        for p in item {
+                            let ext = p.extension();
+                            let basename = new_key.to_owned()
+                                + "."
+                                + ext.unwrap_or(OsStr::new("")).to_str().unwrap_or("");
+                            let new_name = p
+                                .parent()
+                                .expect("parent expected")
+                                .join(Path::new(&basename));
+                            if !self.dry_run {
+                                std::fs::rename(p, new_name)?;
+                            } else {
+                                println!(
+                                    "{} => {}",
+                                    p.display().to_string().italic().dimmed(),
+                                    new_name.display().to_string().bold()
+                                )
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
     /// Update the `Bibliography` of the `CitekeyFormatting` struct and return
     /// it as `String`.
     pub fn print_updated_bib_as_string(&mut self) -> String {
diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs
index 58a8274..61a1804 100644
--- a/src/bibiman/citekeys/citekey_utils.rs
+++ b/src/bibiman/citekeys/citekey_utils.rs
@@ -79,7 +79,7 @@ pub(super) fn formatting_help() {
                 \t{}
                 \tThe bibfile for which the citekey formatting should be processed.
                 \tTakes a path as argument.
-            ", "-s, -f, --source=, --file=".fg::<White>().bold()},
+            ", "-s, -f, --source=<PATH>, --file=<PATH>".fg::<White>().bold()},
         formatdoc!(
             "
                 \t{}
@@ -88,9 +88,22 @@ pub(super) fn formatting_help() {
                 \tcreated.
                 \tIf the argument isn't used, the original file will be {}!
             ",
-            "-t, -o, --target=, --output=".fg::<White>().bold(),
+            "-t, -o, --target=<PATH>, --output=<PATH>"
+                .fg::<White>()
+                .bold(),
             "overwritten".italic(),
         ),
+        formatdoc!(
+            "
+                \t{}
+                \tWhen formatting citekeys also rename all PDFs and notefiles
+                \tfollowing the bibiman citekey-basename scheme at the locations
+                \tset in the config file. This option can break file paths. Try
+                \twith {} first!
+            ",
+            "-u, --update-attachments".fg::<White>().bold(),
+            "--dry-run".bold()
+        ),
     ];
     let help = help.join("\n");
     println!("{}", help);
diff --git a/tests/biblatex-test-citekeys.bib b/tests/biblatex-test-citekeys.bib
deleted file mode 100644
index 34c2f33..0000000
--- a/tests/biblatex-test-citekeys.bib
+++ /dev/null
@@ -1,476 +0,0 @@
-@set{SET,
-    entryset = {article:herrmann-ofele_carboc=carben=as_2006,article:aksin-turkmen_effect=immobi=on_2006,article:yoon-ryu_pallad=pincer=comple_2006},
-    annotation = {A \texttt{set} with three members.},
-}
-
-@set{stdmodel,
-    entryset = {article:glashow_partia=symmet=weak_1961,article:weinberg_model=lepton_1967,salam},
-    annotation = {A \texttt{set} with three members discussing the standard
-                  model of particle physics.},
-}
-
-@collection{collection:matuz-miller_contem=litera=critic_1990gale,
-    title = {Contemporary Literary Criticism},
-    year = {1990},
-    location = {Detroit},
-    publisher = {Gale},
-    volume = {61},
-    pages = {204--208},
-    editor = {Matuz, Roger and Miller, Helen},
-    keywords = {narration},
-    langid = {english},
-    langidopts = {variant=american},
-    annotation = {A \texttt{collection} entry providing the excerpt information
-                  for the \texttt{article:doody_heming=style=jakes_1974} entry. Note the format of the \texttt{
-                  pages} field},
-}
-
-@article{article:aksin-turkmen_effect=immobi=on_2006,
-    title = {Effect of immobilization on catalytic characteristics of saturated
-             {Pd-N}-heterocyclic carbenes in {Mizoroki-Heck} reactions},
-    author = {Aks{\i}n, {\"O}zge and T{\"u}rkmen, Hayati and Artok , Levent and
-              { \c{C}}etinkaya, Bekir and Ni, Chaoying and B{\" u}y{ \"u}kg{\"u}
-              ng{ \" o}r, Orhan and {\"O}zkal, Erhan},
-    volume = {691},
-    number = {13},
-    pages = {3027--3036},
-    journaltitle = jomch,
-    date = {2006},
-    indextitle = {Effect of immobilization on catalytic characteristics},
-}
-
-@article{article:angenendt_honore=salvat=vom_2002,
-    title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der Patrozinienkunde},
-    shorttitle = {In Honore Salvatoris},
-    author = {Angenendt, Arnold},
-    volume = {97},
-    pages = {431--456, 791--823},
-    journaltitle = {Revue d'Histoire Eccl{\'e}siastique},
-    date = {2002},
-    langid = {german},
-    indextitle = {In Honore Salvatoris},
-    annotation = {A German article in a French journal. Apart from that, a
-                  typical \texttt{article} entry. Note the \texttt{indextitle}
-                  field},
-}
-
-@book{book:aristotle_de=anima_1907cambr#unive#press,
-    title = {De Anima},
-    author = {Aristotle},
-    location = {Cambridge},
-    publisher = cup,
-    date = {1907},
-    editor = {Hicks, Robert Drew},
-    keywords = {primary, ancient, philosophy, athens},
-    langid = {english},
-    langidopts = {variant=british},
-    annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{
-                  editor}},
-}
-
-@book{book:aristotle_physic_1929g#p#putna,
-    title = {Physics},
-    shorttitle = {Physics},
-    author = {Aristotle},
-    location = {New York},
-    publisher = {G. P. Putnam},
-    url = {https://www.infobooks.org/authors/classic/aristotle-books/#Physic},
-    date = {1929},
-    translator = {Wicksteed, P. H. and Cornford, F. M.},
-    keywords = {primary, ancient, philosophy},
-    langid = {english},
-    langidopts = {variant=american},
-    file = {~/Documents/coding/projects/bibiman/tests/book:aristotle_physic_1929g#p#putna.pdf},
-    annotation = {A \texttt{book} entry with a \texttt{translator} field},
-    abstract = {The Physics is a work by Aristotle dedicated to the study of
-                nature. Regarded by Heidegger as "the fundamental work of Western
-                philosophy", it presents the renowned distinction between the
-                four types of cause, as well as reflections on chance, motion,
-                infinity, and other fundamental concepts. It is here that
-                Aristotle sets out his celebrated paradox of time.},
-}
-
-@book{book:aristotle_poetic_1968clare#press,
-    title = {Poetics},
-    shorttitle = {Poetics},
-    author = {Aristotle},
-    location = {Oxford},
-    publisher = {Clarendon Press},
-    series = {Clarendon {Aristotle}},
-    date = {1968},
-    editor = {Lucas, D. W.},
-    keywords = {primary},
-    langid = {english},
-    langidopts = {variant=british},
-    annotation = {A \texttt{book} entry with an \texttt{author} and an \texttt{
-                  editor} as well as a \texttt{series} field},
-}
-
-@mvbook{mvbook:aristotle_rhetor=aristo=with_1877cambr#unive#press,
-    title = {The \textbf{Rhetoric} of {Aristotle} with a commentary by the late {Edward
-             Meredith Cope}},
-    shorttitle = {Rhetoric},
-    author = {Aristotle},
-    publisher = cup,
-    date = {1877},
-    editor = {Cope, Edward Meredith},
-    commentator = {Cope, Edward Meredith},
-    volumes = {3},
-    keywords = {primary},
-    langid = {english},
-    langidopts = {variant=british},
-    sorttitle = {Rhetoric of Aristotle},
-    indextitle = {Rhetoric of {Aristotle}, The},
-    annotation = {A commented edition. Note the concatenation of the \texttt{
-                  editor} and \texttt{commentator} fields as well as the \texttt{
-                  volumes}, \texttt{sorttitle}, and \texttt{indextitle} fields},
-}
-
-@book{book:augustine_hetero=cataly=synthe_1995marce#dekke,
-    title = {Heterogeneous catalysis for the synthetic \textit{chemist}},
-    shorttitle = {Heterogeneous catalysis},
-    author = {Augustine, Robert L.},
-    location = {New York},
-    publisher = {Marcel Dekker},
-    date = {1995},
-    langid = {english},
-    langidopts = {variant=american},
-    annotation = {A plain \texttt{book} entry},
-    keywords = {chemistry},
-}
-
-@book{book:averroes_epistl=on=possib_1982jewis#theol#semin#ameri,
-    title = {The Epistle on the Possibility of Conjunction with the Active
-             Intellect by {Ibn Rushd} with the Commentary of {Moses Narboni}},
-    shorttitle = {Possibility of Conjunction},
-    author = {Averroes},
-    location = {New York},
-    publisher = {Jewish Theological Seminary of America},
-    series = {Moreshet: Studies in {Jewish} History, Literature and Thought},
-    number = {7},
-    date = {1982},
-    editor = {Bland, Kalman P.},
-    translator = {Bland, Kalman P.},
-    keywords = {primary},
-    langid = {english},
-    langidopts = {variant=american},
-    indextitle = {Epistle on the Possibility of Conjunction, The},
-    annotation = {A \texttt{book} entry with a \texttt{series} and a \texttt{
-                  number}. Note the concatenation of the \texttt{editor} and
-                  \texttt{translator} fields as well as the \texttt{indextitle}
-                  field},
-}
-
-@article{article:baez-lauda_higher=algebr=v_2004,
-    title = {Higher-Dimensional Algebra {V}: 2-Groups},
-    author = {Baez, John C. and Lauda, Aaron D.},
-    volume = {12},
-    pages = {423--491},
-    journaltitle = {Theory and Applications of Categories},
-    date = {2004},
-    version = {3},
-    eprint = {math/0307200v3},
-    eprinttype = {arxiv},
-    langid = {english},
-    keywords = {math},
-    langidopts = {variant=american},
-    annotation = {An \texttt{article} with \texttt{eprint} and \texttt{
-                  eprinttype} fields. Note that the arXiv reference is
-                  transformed into a clickable link if \texttt{hyperref} support
-                  has been enabled. Compare \texttt{baez\slash online}, which is
-                  the same item given as an \texttt{online} entry},
-}
-
-@article{article:bertram-wentworth_gromov=invari=holomo_1996,
-    title = {Gromov invariants for holomorphic maps on {Riemann} surfaces},
-    shorttitle = {Gromov invariants},
-    author = {Bertram, Aaron and Wentworth, Richard},
-    volume = {9},
-    number = {2},
-    pages = {529--571},
-    journaltitle = jams,
-    date = {1996},
-    langid = {english},
-    langidopts = {variant=american},
-    annotation = {An \texttt{article} entry with a \texttt{volume} and a \texttt
-                  {number} field},
-}
-
-@article{article:doody_heming=style=jakes_1974,
-    title = {Hemingway's Style and {Jake's} Narration},
-    author = {Doody, Terrence},
-    year = {1974},
-    journal = {The Journal of Narrative Technique},
-    volume = {4},
-    number = {3},
-    pages = {212--225},
-    langid = {english},
-    langidopts = {variant=american},
-    related = {matuz:article:doody_heming=style=jakes_1974},
-    relatedstring = {\autocap{e}xcerpt in},
-    annotation = {An \texttt{article} entry cited as an excerpt from a \texttt{
-                  collection} entry. Note the format of the \texttt{related} and
-                  \texttt{relatedstring} fields},
-}
-
-@article{article:gillies_herder=prepar=goethe_1933,
-    title = {Herder and the Preparation of {Goethe's} Idea of World Literature},
-    author = {Gillies, Alexander},
-    series = {newseries},
-    volume = {9},
-    pages = {46--67},
-    journaltitle = {Publications of the English Goethe Society},
-    date = {1933},
-    langid = {english},
-    langidopts = {variant=british},
-    annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt
-                  {volume} field. Note that format of the \texttt{series} field
-                  in the database file},
-}
-
-@article{article:glashow_partia=symmet=weak_1961,
-    title = {Partial Symmetries of Weak Interactions},
-    author = {Glashow, Sheldon},
-    volume = {22},
-    pages = {579--588},
-    journaltitle = {Nucl.~Phys.},
-    date = {1961},
-}
-
-@article{article:herrmann-ofele_carboc=carben=as_2006,
-    title = {A carbocyclic carbene as an efficient catalyst ligand for {C--C}
-             coupling reactions},
-    author = {Herrmann, Wolfgang A. and {\"O}fele, Karl and Schneider, Sabine K.
-              and Herdtweck, Eberhardt and Hoffmann, Stephan D.},
-    volume = {45},
-    number = {23},
-    pages = {3859--3862},
-    journaltitle = anch-ie,
-    date = {2006},
-    indextitle = {Carbocyclic carbene as an efficient catalyst, A},
-}
-
-@article{article:hostetler-wingate_alkane=gold=cluste_1998,
-    title = {Alkanethiolate gold cluster molecules with core diameters from 1.5
-             to 5.2~{nm}},
-    shorttitle = {Alkanethiolate gold cluster molecules},
-    author = {Hostetler, Michael J. and Wingate, Julia E. and Zhong, Chuan-Jian
-              and Harris, Jay E. and Vachet, Richard W. and Clark, Michael R. and
-              Londono, J. David and Green, Stephen J. and Stokes, Jennifer J. and
-              Wignall, George D. and Glish, Gary L. and Porter, Marc D. and Evans
-              , Neal D. and Murray, Royce W.},
-    volume = {14},
-    number = {1},
-    pages = {17--30},
-    journaltitle = {Langmuir},
-    date = {1998},
-    subtitle = {Core and monolayer properties as a function of core size},
-    langid = {english},
-    langidopts = {variant=american},
-    indextitle = {Alkanethiolate gold cluster molecules},
-    annotation = {An \texttt{article} entry with \arabic{author} authors. By
-                  default, long author and editor lists are automatically
-                  truncated. This is configurable},
-}
-
-@article{article:kastenholz-hunenberger_comput=method=ionic_2006,
-    title = {Computation of methodology\hyphen independent ionic solvation free
-             energies from molecular simulations},
-    author = {Kastenholz, M. A. and H{\"u}nenberger, Philippe H.},
-    volume = {124},
-    doi = {10.1063/1.2172593},
-    journaltitle = jchph,
-    date = {2006},
-    subtitle = {{I}. {The} electrostatic potential in molecular liquids},
-    eid = {124106},
-    langid = {english},
-    langidopts = {variant=american},
-    indextitle = {Computation of ionic solvation free energies},
-    annotation = {An \texttt{article} entry with an \texttt{eid} and a \texttt{
-                  doi} field. Note that the \textsc{doi} is transformed into a
-                  clickable link if \texttt{hyperref} support has been enabled},
-    abstract = {The computation of \texttt{ionic} solvation free energies from atomistic
-                simulations is a surprisingly difficult problem that has found no
-                satisfactory solution for more than 15 years. The reason is that
-                the charging free energies evaluated from such simulations are
-                affected by very large errors. One of these is related to the
-                choice of a specific convention for summing up the contributions
-                of solvent charges to the electrostatic potential in the ionic
-                cavity, namely, on the basis of point charges within entire
-                solvent molecules (M scheme) or on the basis of individual point
-                charges (P scheme). The use of an inappropriate convention may
-                lead to a charge-independent offset in the calculated potential,
-                which depends on the details of the summation scheme, on the
-                quadrupole-moment trace of the solvent molecule, and on the
-                approximate form used to represent electrostatic interactions in
-                the system. However, whether the M or P scheme (if any)
-                represents the appropriate convention is still a matter of
-                on-going debate. The goal of the present article is to settle
-                this long-standing controversy by carefully analyzing (both
-                analytically and numerically) the properties of the electrostatic
-                potential in molecular liquids (and inside cavities within them).
-                },
-}
-
-@article{article:sarfraz-razzak_techni=sectio=algori_2002,
-    title = {Technical section: {An} algorithm for automatic capturing of the
-             font outlines},
-    author = {M. Sarfraz and M. F. A. Razzak},
-    year = {2002},
-    journal = {Computers and Graphics},
-    volume = {26},
-    number = {5},
-    pages = {795--804},
-    issn = {0097-8493},
-    annotation = {An \texttt{article} entry with an \texttt{issn} field},
-}
-
-@article{article:reese_georgi=anglos=diplom_1958,
-    title = {Georgia in {Anglo-Spanish} Diplomacy, 1736--1739},
-    author = {Reese, Trevor R.},
-    series = {3},
-    volume = {15},
-    pages = {168--190},
-    journaltitle = {William and Mary Quarterly},
-    date = {1958},
-    langid = {english},
-    langidopts = {variant=american},
-    annotation = {An \texttt{article} entry with a \texttt{series} and a \texttt
-                  {volume} field. Note the format of the series. If the value of
-                  the \texttt{series} field is an integer, this number is printed
-                  as an ordinal and the string \enquote*{series} is appended
-                  automatically},
-}
-
-@article{article:shore_twiceb=once=concei_1991,
-    title = {Twice-Born, Once Conceived},
-    author = {Shore, Bradd},
-    series = {newseries},
-    volume = {93},
-    number = {1},
-    pages = {9--27},
-    journaltitle = {American Anthropologist},
-    date = {1991-03},
-    subtitle = {Meaning Construction and Cultural Cognition},
-    annotation = {An \texttt{article} entry with \texttt{series}, \texttt{volume
-                  }, and \texttt{number} fields. Note the format of the \texttt{
-                  series} which is a localization key},
-}
-
-@article{article:sigfridsson-ryde_compar=method=derivi_1998,
-    title = {Comparison of methods for deriving atomic charges from the
-             electrostatic potential and moments},
-    author = {Sigfridsson, Emma and Ryde, Ulf},
-    volume = {19},
-    number = {4},
-    pages = {377--395},
-    doi = {10.1002/(SICI)1096-987X(199803)19:4<377::AID-JCC1>3.0.CO;2-P},
-    journaltitle = {Journal of Computational Chemistry},
-    date = {1998},
-    langid = {english},
-    langidopts = {variant=american},
-    indextitle = {Methods for deriving atomic charges},
-    annotation = {An \texttt{article} entry with \texttt{volume}, \texttt{number
-                  }, and \texttt{doi} fields. Note that the \textsc{doi} is
-                  transformed into a clickable link if \texttt{hyperref} support
-                  has been enabled},
-    abstract = {Four methods for deriving partial atomic charges from the
-                quantum chemical electrostatic potential (CHELP, CHELPG,
-                Merz-Kollman, and RESP) have been compared and critically
-                evaluated. It is shown that charges strongly depend on how and
-                where the potential points are selected. Two alternative methods
-                are suggested to avoid the arbitrariness in the point-selection
-                schemes and van der Waals exclusion radii: CHELP-BOW, which also
-                estimates the charges from the electrostatic potential, but with
-                potential points that are Boltzmann-weighted after their
-                occurrence in actual simulations using the energy function of the
-                program in which the charges will be used, and CHELMO, which
-                estimates the charges directly from the electrostatic multipole
-                moments. Different criteria for the quality of the charges are
-                discussed.},
-}
-
-@article{article:spiegelberg_intent=intent=schola_1969,
-    title = {\mkbibquote{Intention} und \mkbibquote{Intentionalit{\"a}t} in der
-             Scholastik, bei Brentano und Husserl},
-    shorttitle = {Intention und Intentionalit{\"a}t},
-    author = {Spiegelberg, Herbert},
-    volume = {29},
-    pages = {189--216},
-    journaltitle = {Studia Philosophica},
-    date = {1969},
-    langid = {german},
-    sorttitle = {Intention und Intentionalitat in der Scholastik, bei Brentano
-                 und Husserl},
-    indexsorttitle = {Intention und Intentionalitat in der Scholastik, bei
-                      Brentano und Husserl},
-    annotation = {An \texttt{article} entry. Note the \texttt{sorttitle} and
-                  \texttt{indexsorttitle} fields and the markup of the quotes in
-                  the database file},
-}
-
-@article{article:springer_mediae=pilgri=routes_1950,
-    title = {Mediaeval Pilgrim Routes from {Scandinavia} to {Rome}},
-    shorttitle = {Mediaeval Pilgrim Routes},
-    author = {Springer, Otto},
-    volume = {12},
-    pages = {92--122},
-    journaltitle = {Mediaeval Studies},
-    date = {1950},
-    langid = {english},
-    langidopts = {variant=british},
-    annotation = {A plain \texttt{article} entry},
-}
-
-@article{article:weinberg_model=lepton_1967,
-    title = {A Model of Leptons},
-    author = {Weinberg, Steven},
-    volume = {19},
-    pages = {1264--1266},
-    journaltitle = {Phys.~Rev.~Lett.},
-    date = {1967},
-}
-
-@string{anch-ie = {Angew.~Chem. Int.~Ed.}}
-
-@string{cup = {Cambridge University Press}}
-
-@string{dtv = {Deutscher Taschenbuch-Verlag}}
-
-@string{hup = {Harvard University Press}}
-
-@string{jams = {J.~Amer. Math. Soc.}}
-
-@string{jchph = {J.~Chem. Phys.}}
-
-@string{jomch = {J.~Organomet. Chem.}}
-
-@string{pup = {Princeton University Press}}
-
-@incollection{incollection:westfahl_true=fronti,
-    title = {The True Frontier},
-    author = {Westfahl, Gary},
-    pages = {55--65},
-    subtitle = {Confronting and Avoiding the Realities of Space in {American}
-                Science Fiction Films},
-    crossref = {westfahl:frontier},
-    langid = {english},
-    langidopts = {variant=american},
-    indextitle = {True Frontier, The},
-    annotation = {A cross-referenced article from a \texttt{collection}. This is
-                  an \texttt{incollection} entry with a \texttt{crossref} field.
-                  Note the \texttt{subtitle} and \texttt{indextitle} fields},
-}
-
-@article{article:yoon-ryu_pallad=pincer=comple_2006,
-    title = {Palladium pincer complexes with reduced bond angle strain:
-             efficient catalysts for the {Heck} reaction},
-    author = {Yoon, Myeong S. and Ryu, Dowook and Kim, Jeongryul and Ahn, Kyo
-              Han},
-    volume = {25},
-    number = {10},
-    pages = {2409--2411},
-    journaltitle = {Organometallics},
-    date = {2006},
-    indextitle = {Palladium pincer complexes},
-}
-- 
cgit v1.2.3


From b582588b642e8a38956df2b573ae2be51f19e082 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Tue, 14 Oct 2025 16:50:31 +0200
Subject: correct some typos

---
 src/bibiman/citekeys/citekey_utils.rs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs
index 61a1804..b8f5600 100644
--- a/src/bibiman/citekeys/citekey_utils.rs
+++ b/src/bibiman/citekeys/citekey_utils.rs
@@ -38,11 +38,12 @@ pub(super) fn formatting_help() {
         ),
         formatdoc!("{}", "USAGE".bold()),
         formatdoc!(
-            "\t{} {} {} {}\n",
+            "\t{} {} {} {} {}\n",
             env!("CARGO_PKG_NAME").fg::<White>().bold(),
             "format-citekeys".bold(),
+            "[OPTIONS]".bold(),
             "--source=<SOURCE>".bold(),
-            "--output=<TARGET>".bold()
+            "[--output=<TARGET>]".bold()
         ),
         formatdoc!(
             "
@@ -96,10 +97,10 @@ pub(super) fn formatting_help() {
         formatdoc!(
             "
                 \t{}
-                \tWhen formatting citekeys also rename all PDFs and notefiles
-                \tfollowing the bibiman citekey-basename scheme at the locations
-                \tset in the config file. This option can break file paths. Try
-                \twith {} first!
+                \tWhen this option is set, bibiman will also rename all PDFs and 
+                \tnotefiles following the bibiman citekey-basename scheme at the 
+                \tlocations set in the config file. This option can break file paths. 
+                \tTry with {} first!
             ",
             "-u, --update-attachments".fg::<White>().bold(),
             "--dry-run".bold()
-- 
cgit v1.2.3


From 2dc231247757a9a80b1925ed215f53f54eececa5 Mon Sep 17 00:00:00 2001
From: lukeflo
Date: Wed, 15 Oct 2025 07:28:20 +0200
Subject: fix tests, remove unneeded imports, add description

---
 src/bibiman/citekeys.rs               | 6 +++---
 src/bibiman/citekeys/citekey_utils.rs | 3 ---
 src/config.rs                         | 1 +
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 8f70ab0..fdeed14 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -241,9 +241,9 @@ impl<'a> CitekeyFormatting<'a> {
     /// That will prevent the replacement longer key parts that equal a full shorter
     /// key.
     ///
-    /// You are **very encouraged** to call this method before `update_file()` to
-    /// prevent replacing citekeys partly which afterwards wont match the pattern
-    /// anymore.
+    /// You are **very encouraged** to call this method before `update_file()`
+    /// or `update_notes_pdfs` to prevent replacing citekeys partly which
+    /// afterwards wont match the pattern anymore.
     pub fn rev_sort_new_keys_by_len(mut self) -> Self {
         self.old_new_keys_map
             .sort_by(|a, b| b.0.len().cmp(&a.0.len()));
diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs
index b8f5600..773a2d2 100644
--- a/src/bibiman/citekeys/citekey_utils.rs
+++ b/src/bibiman/citekeys/citekey_utils.rs
@@ -15,10 +15,7 @@
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 /////
 
-use std::sync::LazyLock;
-
 use biblatex::{ChunksExt, Entry, Type};
-use deunicode::deunicode;
 use indoc::formatdoc;
 use owo_colors::{
     OwoColorize,
diff --git a/src/config.rs b/src/config.rs
index b8d8b45..47e145c 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -501,6 +501,7 @@ mod tests {
                     year_color = "135"
 
                     [citekey_formatter]
+                    ascii_only = true
                 "#,
             )?;
 
-- 
cgit v1.2.3