diff options
| author | lukeflo | 2025-10-10 13:47:07 +0200 |
|---|---|---|
| committer | lukeflo | 2025-10-10 13:47:07 +0200 |
| commit | 4779dbc5fe3712bce31bbb5f1f43c28c4c839420 (patch) | |
| tree | 9f96997d5c0bab44584eb648396c7067c94f7db5 | |
| parent | f9548af5c7693edf536b4ad45564a964338e2c2e (diff) | |
| download | bibiman-4779dbc5fe3712bce31bbb5f1f43c28c4c839420.tar.gz bibiman-4779dbc5fe3712bce31bbb5f1f43c28c4c839420.zip | |
substitute byte index for char counting loop, impl `dry-run` option for citekey formatting
| -rw-r--r-- | src/bibiman/citekeys.rs | 118 | ||||
| -rw-r--r-- | tests/test-config.toml | 4 |
2 files changed, 90 insertions, 32 deletions
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index cafd124..5121741 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -22,7 +22,7 @@ use std::{ }; use biblatex::{Bibliography, ChunksExt, Entry, Type}; -use color_eyre::eyre::eyre; +use color_eyre::eyre::{OptionExt, eyre}; use indoc::formatdoc; use lexopt::Arg::{Long, Short}; use owo_colors::{ @@ -50,6 +50,7 @@ pub(crate) struct CitekeyFormatting { fields: Vec<String>, case: Option<CitekeyCase>, old_new_keys_map: Vec<(String, String)>, + dry_run: bool, } impl CitekeyFormatting { @@ -59,12 +60,27 @@ impl CitekeyFormatting { ) -> color_eyre::Result<()> { let mut formatter = CitekeyFormatting::default(); + formatter.fields = cfg + .citekey_formatter + .fields + .clone() + .ok_or_eyre("Need to define fields correctly in config file")?; + + formatter.case = cfg.citekey_formatter.case.clone(); + + if formatter.fields.is_empty() { + return Err(eyre!( + "To format all citekeys, you need to provide {} values in the config file", + "fields".bold() + )); + } while let Some(arg) = parser.next()? { match arg { Short('h') | Long("help") => { formatting_help(); return Ok(()); } + Short('d') | Long("dry-run") => formatter.dry_run = true, Short('s') | Short('f') | Long("source") | Long("file") => { formatter.bibfile_path.0 = parser.value()?.into() } @@ -75,6 +91,16 @@ impl CitekeyFormatting { } } + let bibstring = std::fs::read_to_string(&formatter.bibfile_path.0)?; + + formatter.bib_entries = Bibliography::parse(&bibstring) + .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?; + + formatter + .do_formatting() + .rev_sort_new_keys_by_len() + .update_file()?; + Ok(()) } /// Start Citekey formatting with building a new instance of `CitekeyFormatting` @@ -108,6 +134,7 @@ impl CitekeyFormatting { fields, case: cfg.citekey_formatter.case.clone(), old_new_keys_map: Vec::new(), + dry_run: false, }) } @@ -122,32 +149,40 @@ impl CitekeyFormatting { } self.old_new_keys_map = old_new_keys; + // dbg!(&self.old_new_keys_map); self } /// Write entries with updated citekeys to bibfile - pub fn update_file(&self) -> color_eyre::Result<()> { - let source_file = self.bibfile_path.0.as_path(); - let target_file = if let Some(path) = &self.bibfile_path.1 { - path + pub fn update_file(&mut self) -> color_eyre::Result<()> { + if self.dry_run { + println!("Following citekeys would be formatted: old => new\n"); + self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0)); + for (old, new) in &self.old_new_keys_map { + println!("{} => {}", old.italic(), new.bold()) + } } else { - source_file - }; - let mut content = std::fs::read_to_string(source_file)?; - - for (old_key, new_key) in self.old_new_keys_map.iter() { - content = content.replace(old_key, new_key); - } + let source_file = self.bibfile_path.0.as_path(); + let target_file = if let Some(path) = &self.bibfile_path.1 { + path + } else { + source_file + }; + let mut content = std::fs::read_to_string(source_file)?; - let mut new_file = OpenOptions::new() - .truncate(true) - .write(true) - .create(true) - .open(target_file)?; + for (old_key, new_key) in self.old_new_keys_map.iter() { + content = content.replace(old_key, new_key); + } - new_file.write_all(content.as_bytes())?; + let mut new_file = OpenOptions::new() + .truncate(true) + .write(true) + .create(true) + .open(target_file)?; + new_file.write_all(content.as_bytes())?; + } Ok(()) } @@ -160,7 +195,7 @@ impl CitekeyFormatting { /// anymore. pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self { self.old_new_keys_map - .sort_by(|a, b| b.1.len().cmp(&a.1.len())); + .sort_by(|a, b| b.0.len().cmp(&a.0.len())); self } } @@ -232,14 +267,20 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey let field = preformat_field(field, entry); let mut split_field = field.split_whitespace(); let mut words_passed = 0; - let word_count = if let Some(val) = word_count { + let field_count = field.split_whitespace().count(); + let word_count = if let Some(val) = word_count + && val <= field_count + { val } else { - field.split_whitespace().count() + field_count }; loop { + if field.is_empty() { + break; + } if let Some(field_slice) = split_field.next() { - formatted_str = formatted_str + format_word(field_slice, char_count); + formatted_str = formatted_str + &format_word(field_slice, char_count); words_passed += 1; if word_count == words_passed { formatted_str = formatted_str + trailing_delimiter.unwrap_or(""); @@ -270,7 +311,7 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey fn preformat_field(field: &str, entry: &Entry) -> String { match field { "title" => { - sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into())) + sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into())) } "author" => { if let Ok(authors) = entry.author() { @@ -278,21 +319,20 @@ fn preformat_field(field: &str, entry: &Entry) -> String { for a in authors.iter() { last_names = last_names + &a.name + " "; } - dbg!(&last_names); last_names } else { - "NA".to_string() + "".to_string() } } "year" => { if let Ok(date) = entry.date() { date.to_chunks().format_verbatim()[..4].to_string() } else { - entry.get_as::<String>(field).unwrap_or("NA".into()) + entry.get_as::<String>(field).unwrap_or("".into()) } } "subtitle" => { - sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into())) + sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into())) } "editor" => { if let Ok(editors) = entry.editors() { @@ -304,22 +344,35 @@ fn preformat_field(field: &str, entry: &Entry) -> String { } last_names } else { - "NA".to_string() + "".to_string() } } "pubtype" | "entrytype" => entry.entry_type.to_string(), - _ => entry.get_as::<String>(field).unwrap_or("Empty".into()), + _ => entry.get_as::<String>(field).unwrap_or("".into()), } } /// Cut of word at char count index if its set -fn format_word(word: &str, count: Option<usize>) -> &str { +fn format_word(word: &str, count: Option<usize>) -> String { if let Some(len) = count && len < word.chars().count() { - &word[..len] + // Since chars can consist of multiple bytes, we need this more complex + // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...) + // instead of simple byte indexing + let mut word_slice = String::new(); + let word_chars = word.chars(); + let mut counter = 0; + for c in word_chars { + if counter == len { + break; + } + word_slice.push(c); + counter += 1; + } + word_slice } else { - word + word.to_string() } } @@ -427,6 +480,7 @@ mod tests { ], case: None, old_new_keys_map: Vec::new(), + dry_run: false, }; let _ = formatting_struct.do_formatting(); assert_eq!( diff --git a/tests/test-config.toml b/tests/test-config.toml index fc447f1..b484b69 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -59,3 +59,7 @@ custom_column = "series" # author_color = "38" # title_color = "37" # year_color = "135" + +[citekey_formatter] +fields = [ "author;2;;-;_", "title;3;3;_;_", "year" ] +case = "lowercase" |
