aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlukeflo2025-10-12 23:01:17 +0200
committerlukeflo2025-10-12 23:01:27 +0200
commit0a8805acfb6fbb3d3a8c22f4ccbaf692a73cddfb (patch)
tree3009e5c32985690cc1b346f4688fa3e9e3da7fde
parentf112c4e13009e5ddfe3cf5c4cbe7f29f832b8553 (diff)
downloadbibiman-0a8805acfb6fbb3d3a8c22f4ccbaf692a73cddfb.tar.gz
bibiman-0a8805acfb6fbb3d3a8c22f4ccbaf692a73cddfb.zip
ignore list for words, but need to solve inner delimiter problem for words ignored
-rw-r--r--src/bibiman/citekeys.rs317
-rw-r--r--src/bibiman/citekeys/citekey_utils.rs327
-rw-r--r--tests/test-config.toml7
3 files changed, 348 insertions, 303 deletions
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index 9d17403..2f56947 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -21,27 +21,33 @@ use std::{
path::{Path, PathBuf},
};
-use biblatex::{Bibliography, ChunksExt, Entry, Type};
+use biblatex::Bibliography;
use color_eyre::eyre::{OptionExt, eyre};
-use indoc::formatdoc;
use lexopt::Arg::{Long, Short};
-use owo_colors::{
- OwoColorize,
- colors::{BrightBlue, Green, White},
-};
+use owo_colors::OwoColorize;
use serde::{Deserialize, Serialize};
use crate::{
- bibiman::sanitize::sanitize_single_string_fully,
- config::{BibiConfig, IGNORED_SPECIAL_CHARS},
+ bibiman::citekeys::citekey_utils::{build_citekey, formatting_help},
+ config::BibiConfig,
};
+mod citekey_utils;
+
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum CitekeyCase {
#[serde(alias = "uppercase", alias = "upper")]
Upper,
#[serde(alias = "lowercase", alias = "lower")]
Lower,
+ #[serde(
+ alias = "camel",
+ alias = "camelcase",
+ alias = "camel_case",
+ alias = "uppercamelcase",
+ alias = "upper_camel_case"
+ )]
+ Camel,
}
#[derive(Debug, Default, Clone)]
@@ -203,306 +209,13 @@ impl CitekeyFormatting {
}
}
-fn formatting_help() {
- let help = vec![
- formatdoc!(
- "{} {}\n",
- env!("CARGO_PKG_NAME").fg::<Green>().bold(),
- env!("CARGO_PKG_VERSION")
- ),
- formatdoc!("{}", "USAGE".bold()),
- formatdoc!(
- "\t{} {} {} {}\n",
- env!("CARGO_PKG_NAME").fg::<White>().bold(),
- "format-citekeys".bold(),
- "--source=<SOURCE>".bold(),
- "--output=<TARGET>".bold()
- ),
- formatdoc!(
- "
- \tThis help describes the CLI usage for the citekey formatting
- \tfunctionality of bibiman. The definition of patterns how the
- \tcitekeys should be formatted must be set in the config file.
- \tFor further informations how to use this patterns etc. see:
- \t{}
- ",
- "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman"
- .italic()
- .fg::<BrightBlue>()
- ),
- formatdoc!("{}", "OPTIONS".bold()),
- formatdoc!(
- "
- \t{}
- \tShow this help and exit
- ",
- "-h, --help".fg::<White>().bold()
- ),
- formatdoc!(
- "
- \t{}
- \tDon't apply any changes to the named files. Instead print all
- \told citekeys and the formatted strings that would have been
- \tapplied in the format: {} => {}
- ",
- "-d, --dry-run".fg::<White>().bold(),
- "old_key".italic(),
- "new_key".bold()
- ),
- formatdoc! {"
- \t{}
- \tThe bibfile for which the citekey formatting should be processed.
- \tTakes a path as argument.
- ", "-s, -f, --source=, --file=".fg::<White>().bold()},
- formatdoc!(
- "
- \t{}
- \tThe bibfile to which the updated content should be written.
- \tTakes a path as argument. If the file doesn't exist, it will be
- \tcreated.
- \tIf the argument isn't used, the original file will be {}!
- ",
- "-t, -o, --target=, --output=".fg::<White>().bold(),
- "overwritten".italic(),
- ),
- ];
- let help = help.join("\n");
- println!("{}", help);
-}
-
-/// Build the citekey from the patterns defined in the config file
-fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&CitekeyCase>) -> String {
- // mut string the citekey is built from
- let mut new_citekey = String::new();
-
- // count different fields of pattern vec
- let fields = pattern_fields.len();
-
- // loop over pattern fields process them
- for (idx, pattern) in pattern_fields.iter().enumerate() {
- // parse single values from pattern field
- let (field_name, word_count, char_count, inner_delimiter, trailing_delimiter) =
- split_formatting_pat(pattern);
-
- // built the part of the citekey from the current pattern field
- let formatted_field_str = {
- let mut formatted_str = String::new();
-
- // preformat the field depending on biblatex value
- let field = preformat_field(field_name, entry);
-
- // split at whitespaces, count fields and set counter for processed
- // splits
- let mut split_field = field.split_whitespace();
- let mut words_passed = 0;
- let field_count = field.split_whitespace().count();
- let word_count = if let Some(val) = word_count
- && val <= field_count
- {
- val
- } else {
- field_count
- };
-
- // loop over single parts of current field and add correct delimiter
- loop {
- // terminate loop for current field if its empty. If its also the
- // last of the pattern vec, pop the trailing delimiter
- if field.is_empty() {
- if idx + 1 == fields {
- let _ = new_citekey.pop();
- }
- break;
- }
-
- // process the single slices and add correct delimiter
- if let Some(field_slice) = split_field.next() {
- // Create word slice char by char. We need to loop over chars
- // instead of a simple bytes index to also catch chars which
- // consist of more than one byte (äöüøæ etc...)
- let mut word_slice = String::new();
- let word_chars = field_slice.chars();
- let mut counter = 0;
- for c in word_chars {
- if let Some(len) = char_count
- && counter == len
- {
- break;
- }
- // if a word slice contains a special char, skip it
- if IGNORED_SPECIAL_CHARS.contains(&c) {
- continue;
- }
- word_slice.push(c);
- counter += 1;
- }
- // Don't count empty slices and don't add delimiter to those
- if !word_slice.is_empty() {
- formatted_str = formatted_str + &word_slice;
- words_passed += 1;
- if word_count == words_passed {
- formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
- break;
- } else {
- formatted_str = formatted_str + inner_delimiter.unwrap_or("");
- }
- } else {
- continue;
- }
- } else {
- formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
- break;
- };
- }
- formatted_str
- };
- new_citekey = new_citekey + &formatted_field_str;
- }
- if let Some(case_format) = case {
- match case_format {
- CitekeyCase::Lower => new_citekey.to_lowercase(),
- CitekeyCase::Upper => new_citekey.to_uppercase(),
- }
- } else {
- new_citekey
- }
-}
-
-/// Preformat some fields which are very common to be used in citekeys
-fn preformat_field(field: &str, entry: &Entry) -> String {
- match field {
- "title" => {
- sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
- }
- "author" => {
- if let Ok(authors) = entry.author() {
- let mut last_names = String::new();
- for a in authors.iter() {
- last_names = last_names + &a.name + " ";
- }
- last_names
- } else {
- "".to_string()
- }
- }
- "year" => {
- if let Ok(date) = entry.date() {
- date.to_chunks().format_verbatim()[..4].to_string()
- } else {
- entry.get_as::<String>(field).unwrap_or("".into())
- }
- }
- "subtitle" => {
- sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
- }
- "editor" => {
- if let Ok(editors) = entry.editors() {
- let mut last_names = String::new();
- for editortypes in editors.iter() {
- for e in editortypes.0.iter() {
- last_names = last_names + &e.name + " ";
- }
- }
- last_names
- } else {
- "".to_string()
- }
- }
- "pubtype" | "entrytype" => entry.entry_type.to_string(),
- _ => entry.get_as::<String>(field).unwrap_or("".into()),
- }
-}
-
-/// Cut of word at char count index if its set
-fn format_word(word: &str, count: Option<usize>) -> String {
- // Since chars can consist of multiple bytes, we need this more complex
- // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...)
- // instead of simple byte indexing
- let mut word_slice = String::new();
- let word_chars = word.chars();
- let mut counter = 0;
- for c in word_chars {
- if let Some(len) = count
- && counter == len
- {
- break;
- }
- if IGNORED_SPECIAL_CHARS.contains(&c) {
- continue;
- }
- word_slice.push(c);
- counter += 1;
- }
- word_slice
-}
-
-/// Split a formatting pattern of kind
-/// `<field>;<word count>;<char count>;<inside delimiter>;<trailing delimiter>`,
-/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")`
-fn split_formatting_pat(
- pattern: &str,
-) -> (
- &str,
- Option<usize>,
- Option<usize>,
- Option<&str>,
- Option<&str>,
-) {
- let mut splits = pattern.split(';');
- (
- splits
- .next()
- .expect("Need field value for formatting citekey"),
- if let Some(next) = splits.next()
- && next.len() > 0
- {
- next.parse::<usize>().ok()
- } else {
- None
- },
- if let Some(next) = splits.next()
- && next.len() > 0
- {
- next.parse::<usize>().ok()
- } else {
- None
- },
- splits.next(),
- splits.next(),
- )
-}
-
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use biblatex::Bibliography;
- use itertools::Itertools;
-
- use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting, split_formatting_pat};
-
- #[test]
- fn split_citekey_pattern() {
- let pattern = "title;3;5;_;_";
-
- assert_eq!(
- split_formatting_pat(pattern),
- ("title", Some(3), Some(5), Some("_"), Some("_"))
- );
- let pattern = "year";
-
- assert_eq!(
- split_formatting_pat(pattern),
- ("year", None, None, None, None)
- );
-
- let pattern = "author;1;;;_";
- assert_eq!(
- split_formatting_pat(pattern),
- ("author", Some(1), None, Some(""), Some("_"))
- );
- }
+ use crate::bibiman::citekeys::{CitekeyCase, CitekeyFormatting};
#[test]
fn format_citekey_test() {
diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs
new file mode 100644
index 0000000..ee2c849
--- /dev/null
+++ b/src/bibiman/citekeys/citekey_utils.rs
@@ -0,0 +1,327 @@
+// bibiman - a TUI for managing BibLaTeX databases
+// Copyright (C) 2025 lukeflo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <https://www.gnu.org/licenses/>.
+/////
+
+use biblatex::{ChunksExt, Entry, Type};
+use indoc::formatdoc;
+use owo_colors::{
+ OwoColorize,
+ colors::{BrightBlue, Green, White},
+};
+
+use crate::{
+ bibiman::{citekeys::CitekeyCase, sanitize::sanitize_single_string_fully},
+ config::IGNORED_SPECIAL_CHARS,
+};
+
+const IGNORE_WORDS: [&str; 20] = [
+ "the", "a", "an", "of", "for", "in", "at", "to", "and", "der", "die", "das", "ein", "eine",
+ "eines", "des", "auf", "und", "für", "vor",
+];
+
+pub(super) fn formatting_help() {
+ let help = vec![
+ formatdoc!(
+ "{} {}\n",
+ env!("CARGO_PKG_NAME").fg::<Green>().bold(),
+ env!("CARGO_PKG_VERSION")
+ ),
+ formatdoc!("{}", "USAGE".bold()),
+ formatdoc!(
+ "\t{} {} {} {}\n",
+ env!("CARGO_PKG_NAME").fg::<White>().bold(),
+ "format-citekeys".bold(),
+ "--source=<SOURCE>".bold(),
+ "--output=<TARGET>".bold()
+ ),
+ formatdoc!(
+ "
+ \tThis help describes the CLI usage for the citekey formatting
+ \tfunctionality of bibiman. The definition of patterns how the
+ \tcitekeys should be formatted must be set in the config file.
+ \tFor further informations how to use this patterns etc. see:
+ \t{}
+ ",
+ "https://codeberg.org/lukeflo/bibiman/src/branch/main#bibiman"
+ .italic()
+ .fg::<BrightBlue>()
+ ),
+ formatdoc!("{}", "OPTIONS".bold()),
+ formatdoc!(
+ "
+ \t{}
+ \tShow this help and exit
+ ",
+ "-h, --help".fg::<White>().bold()
+ ),
+ formatdoc!(
+ "
+ \t{}
+ \tDon't apply any changes to the named files. Instead print all
+ \told citekeys and the formatted strings that would have been
+ \tapplied in the format: {} => {}
+ ",
+ "-d, --dry-run".fg::<White>().bold(),
+ "old_key".italic(),
+ "new_key".bold()
+ ),
+ formatdoc! {"
+ \t{}
+ \tThe bibfile for which the citekey formatting should be processed.
+ \tTakes a path as argument.
+ ", "-s, -f, --source=, --file=".fg::<White>().bold()},
+ formatdoc!(
+ "
+ \t{}
+ \tThe bibfile to which the updated content should be written.
+ \tTakes a path as argument. If the file doesn't exist, it will be
+ \tcreated.
+ \tIf the argument isn't used, the original file will be {}!
+ ",
+ "-t, -o, --target=, --output=".fg::<White>().bold(),
+ "overwritten".italic(),
+ ),
+ ];
+ let help = help.join("\n");
+ println!("{}", help);
+}
+
+/// Build the citekey from the patterns defined in the config file
+pub(super) fn build_citekey(
+ entry: &Entry,
+ pattern_fields: &[String],
+ case: Option<&CitekeyCase>,
+) -> String {
+ // mut string the citekey is built from
+ let mut new_citekey = String::new();
+
+ // trailing delimiter of previous field
+ let mut trailing_delimiter: Option<&str> = None;
+
+ // loop over pattern fields process them
+ 'field_loop: for pattern in pattern_fields.iter() {
+ // parse single values from pattern field
+ let (field_name, word_count, char_count, inner_delimiter, cur_trailing_delimiter) =
+ split_formatting_pat(pattern);
+
+ // built the part of the citekey from the current pattern field
+ let formatted_field_str = {
+ let mut formatted_str = String::new();
+
+ // preformat the field depending on biblatex value
+ let field = preformat_field(field_name, entry);
+
+ // split at whitespaces, count fields and set counter for processed
+ // splits
+ let mut split_field = field.split_whitespace();
+ let mut words_passed = 0;
+ let field_count = field.split_whitespace().count();
+ let word_count = if let Some(val) = word_count
+ && val <= field_count
+ {
+ val
+ } else {
+ field_count
+ };
+
+ // If there is a trailing delimiter from the previous field, push it
+ if let Some(del) = trailing_delimiter {
+ formatted_str = del.to_string();
+ };
+
+ // If the current field isn't empty, set trailing delimiter for
+ // upcoming loop repitition. If it's empty, start next run of loop
+ // directly
+ if !field.is_empty() {
+ trailing_delimiter = cur_trailing_delimiter;
+ } else {
+ continue 'field_loop;
+ }
+
+ // loop over single parts of current field and add correct delimiter
+ 'word_loop: loop {
+ // process the single slices and add correct delimiter
+ if let Some(field_slice) = split_field.next() {
+ // Create word slice char by char. We need to loop over chars
+ // instead of a simple bytes index to also catch chars which
+ // consist of more than one byte (äöüøæ etc...)
+ let mut word_slice = String::new();
+ let word_chars = field_slice.chars();
+ let mut counter = 0;
+ 'char_loop: for mut c in word_chars {
+ // If camelcase is set, force first char of word to uppercase
+ if counter == 0 && case == Some(&CitekeyCase::Camel) {
+ c = c.to_ascii_uppercase()
+ }
+ if let Some(len) = char_count
+ && counter == len
+ {
+ break 'char_loop;
+ }
+ // if a word slice contains a special char, skip it
+ if IGNORED_SPECIAL_CHARS.contains(&c) {
+ continue 'char_loop;
+ }
+ word_slice.push(c);
+ counter += 1;
+ }
+ // Don't count empty slices and don't add delimiter to those
+ if !word_slice.is_empty() {
+ formatted_str = formatted_str + &word_slice;
+ words_passed += 1;
+ if word_count == words_passed {
+ break 'word_loop;
+ } else {
+ formatted_str = formatted_str + inner_delimiter.unwrap_or("");
+ }
+ } else {
+ continue 'word_loop;
+ }
+ } else {
+ break 'word_loop;
+ };
+ }
+ formatted_str
+ };
+ new_citekey = new_citekey + &formatted_field_str;
+ }
+ match case {
+ Some(CitekeyCase::Lower) => new_citekey.to_lowercase(),
+ Some(CitekeyCase::Upper) => new_citekey.to_uppercase(),
+ _ => new_citekey,
+ }
+}
+
+/// Preformat some fields which are very common to be used in citekeys
+pub(super) fn preformat_field(field: &str, entry: &Entry) -> String {
+ match field {
+ // Sanitize all macro code from string
+ "title" => {
+ sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
+ }
+ // Get author names. Fall back to editors before setting empty string
+ "author" => {
+ if let Ok(authors) = entry.author() {
+ let mut last_names = String::new();
+ for a in authors.iter() {
+ last_names = last_names + &a.name + " ";
+ }
+ last_names
+ } else if let Ok(editors) = entry.editors() {
+ let mut last_names = String::new();
+ for editortypes in editors.iter() {
+ for e in editortypes.0.iter() {
+ last_names = last_names + &e.name + " ";
+ }
+ }
+ last_names
+ } else {
+ "".to_string()
+ }
+ }
+ // Get year of date field, fallback to year field
+ "year" => {
+ if let Ok(date) = entry.date() {
+ date.to_chunks().format_verbatim()[..4].to_string()
+ } else {
+ entry.get_as::<String>(field).unwrap_or("".into())
+ }
+ }
+ // Sanitize all macro code from string
+ "subtitle" => {
+ sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
+ }
+ "editor" => {
+ if let Ok(editors) = entry.editors() {
+ let mut last_names = String::new();
+ for editortypes in editors.iter() {
+ for e in editortypes.0.iter() {
+ last_names = last_names + &e.name + " ";
+ }
+ }
+ last_names
+ } else {
+ "".to_string()
+ }
+ }
+ "pubtype" | "entrytype" => entry.entry_type.to_string(),
+ _ => entry.get_as::<String>(field).unwrap_or("".into()),
+ }
+}
+
+/// Split a formatting pattern of kind
+/// `<field>;<word count>;<char count>;<inside delimiter>;<trailing delimiter>`,
+/// e.g.: `title;3;3;_;:` will give `("title", 3, 3, "_", ":")`
+pub(super) fn split_formatting_pat(
+ pattern: &str,
+) -> (
+ &str,
+ Option<usize>,
+ Option<usize>,
+ Option<&str>,
+ Option<&str>,
+) {
+ let mut splits = pattern.split(';');
+ (
+ splits
+ .next()
+ .expect("Need field value for formatting citekey"),
+ if let Some(next) = splits.next()
+ && next.len() > 0
+ {
+ next.parse::<usize>().ok()
+ } else {
+ None
+ },
+ if let Some(next) = splits.next()
+ && next.len() > 0
+ {
+ next.parse::<usize>().ok()
+ } else {
+ None
+ },
+ splits.next(),
+ splits.next(),
+ )
+}
+
+#[cfg(test)]
+mod test {
+ use crate::bibiman::citekeys::citekey_utils::split_formatting_pat;
+
+ #[test]
+ fn split_citekey_pattern() {
+ let pattern = "title;3;5;_;_";
+
+ assert_eq!(
+ split_formatting_pat(pattern),
+ ("title", Some(3), Some(5), Some("_"), Some("_"))
+ );
+
+ let pattern = "year";
+
+ assert_eq!(
+ split_formatting_pat(pattern),
+ ("year", None, None, None, None)
+ );
+
+ let pattern = "author;1;;;_";
+ assert_eq!(
+ split_formatting_pat(pattern),
+ ("author", Some(1), None, Some(""), Some("_"))
+ );
+ }
+}
diff --git a/tests/test-config.toml b/tests/test-config.toml
index 2c5ac96..d3e42c5 100644
--- a/tests/test-config.toml
+++ b/tests/test-config.toml
@@ -61,5 +61,10 @@ custom_column = "series"
# year_color = "135"
[citekey_formatter]
-fields = [ "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ]
+fields = ["entrytype;;;;:", "author;2;;-;_", "title;3;6;_;_", "year", "publisher;;5;#;" ]
+# fields = [ # CamelCase test
+# "author;2;;;",
+# "title;5;5;;",
+# "year"
+# ]
case = "lowercase"