aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlukeflo2025-10-10 13:47:07 +0200
committerlukeflo2025-10-10 13:47:07 +0200
commit4779dbc5fe3712bce31bbb5f1f43c28c4c839420 (patch)
tree9f96997d5c0bab44584eb648396c7067c94f7db5
parentf9548af5c7693edf536b4ad45564a964338e2c2e (diff)
downloadbibiman-4779dbc5fe3712bce31bbb5f1f43c28c4c839420.tar.gz
bibiman-4779dbc5fe3712bce31bbb5f1f43c28c4c839420.zip
substitute byte index for char counting loop, impl `dry-run` option for citekey formatting
-rw-r--r--src/bibiman/citekeys.rs118
-rw-r--r--tests/test-config.toml4
2 files changed, 90 insertions, 32 deletions
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index cafd124..5121741 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -22,7 +22,7 @@ use std::{
};
use biblatex::{Bibliography, ChunksExt, Entry, Type};
-use color_eyre::eyre::eyre;
+use color_eyre::eyre::{OptionExt, eyre};
use indoc::formatdoc;
use lexopt::Arg::{Long, Short};
use owo_colors::{
@@ -50,6 +50,7 @@ pub(crate) struct CitekeyFormatting {
fields: Vec<String>,
case: Option<CitekeyCase>,
old_new_keys_map: Vec<(String, String)>,
+ dry_run: bool,
}
impl CitekeyFormatting {
@@ -59,12 +60,27 @@ impl CitekeyFormatting {
) -> color_eyre::Result<()> {
let mut formatter = CitekeyFormatting::default();
+ formatter.fields = cfg
+ .citekey_formatter
+ .fields
+ .clone()
+ .ok_or_eyre("Need to define fields correctly in config file")?;
+
+ formatter.case = cfg.citekey_formatter.case.clone();
+
+ if formatter.fields.is_empty() {
+ return Err(eyre!(
+ "To format all citekeys, you need to provide {} values in the config file",
+ "fields".bold()
+ ));
+ }
while let Some(arg) = parser.next()? {
match arg {
Short('h') | Long("help") => {
formatting_help();
return Ok(());
}
+ Short('d') | Long("dry-run") => formatter.dry_run = true,
Short('s') | Short('f') | Long("source") | Long("file") => {
formatter.bibfile_path.0 = parser.value()?.into()
}
@@ -75,6 +91,16 @@ impl CitekeyFormatting {
}
}
+ let bibstring = std::fs::read_to_string(&formatter.bibfile_path.0)?;
+
+ formatter.bib_entries = Bibliography::parse(&bibstring)
+ .map_err(|e| eyre!("Couldn't parse bibfile due to {}", e.kind))?;
+
+ formatter
+ .do_formatting()
+ .rev_sort_new_keys_by_len()
+ .update_file()?;
+
Ok(())
}
/// Start Citekey formatting with building a new instance of `CitekeyFormatting`
@@ -108,6 +134,7 @@ impl CitekeyFormatting {
fields,
case: cfg.citekey_formatter.case.clone(),
old_new_keys_map: Vec::new(),
+ dry_run: false,
})
}
@@ -122,32 +149,40 @@ impl CitekeyFormatting {
}
self.old_new_keys_map = old_new_keys;
+ // dbg!(&self.old_new_keys_map);
self
}
/// Write entries with updated citekeys to bibfile
- pub fn update_file(&self) -> color_eyre::Result<()> {
- let source_file = self.bibfile_path.0.as_path();
- let target_file = if let Some(path) = &self.bibfile_path.1 {
- path
+ pub fn update_file(&mut self) -> color_eyre::Result<()> {
+ if self.dry_run {
+ println!("Following citekeys would be formatted: old => new\n");
+ self.old_new_keys_map.sort_by(|a, b| a.0.cmp(&b.0));
+ for (old, new) in &self.old_new_keys_map {
+ println!("{} => {}", old.italic(), new.bold())
+ }
} else {
- source_file
- };
- let mut content = std::fs::read_to_string(source_file)?;
-
- for (old_key, new_key) in self.old_new_keys_map.iter() {
- content = content.replace(old_key, new_key);
- }
+ let source_file = self.bibfile_path.0.as_path();
+ let target_file = if let Some(path) = &self.bibfile_path.1 {
+ path
+ } else {
+ source_file
+ };
+ let mut content = std::fs::read_to_string(source_file)?;
- let mut new_file = OpenOptions::new()
- .truncate(true)
- .write(true)
- .create(true)
- .open(target_file)?;
+ for (old_key, new_key) in self.old_new_keys_map.iter() {
+ content = content.replace(old_key, new_key);
+ }
- new_file.write_all(content.as_bytes())?;
+ let mut new_file = OpenOptions::new()
+ .truncate(true)
+ .write(true)
+ .create(true)
+ .open(target_file)?;
+ new_file.write_all(content.as_bytes())?;
+ }
Ok(())
}
@@ -160,7 +195,7 @@ impl CitekeyFormatting {
/// anymore.
pub fn rev_sort_new_keys_by_len(&mut self) -> &mut Self {
self.old_new_keys_map
- .sort_by(|a, b| b.1.len().cmp(&a.1.len()));
+ .sort_by(|a, b| b.0.len().cmp(&a.0.len()));
self
}
}
@@ -232,14 +267,20 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey
let field = preformat_field(field, entry);
let mut split_field = field.split_whitespace();
let mut words_passed = 0;
- let word_count = if let Some(val) = word_count {
+ let field_count = field.split_whitespace().count();
+ let word_count = if let Some(val) = word_count
+ && val <= field_count
+ {
val
} else {
- field.split_whitespace().count()
+ field_count
};
loop {
+ if field.is_empty() {
+ break;
+ }
if let Some(field_slice) = split_field.next() {
- formatted_str = formatted_str + format_word(field_slice, char_count);
+ formatted_str = formatted_str + &format_word(field_slice, char_count);
words_passed += 1;
if word_count == words_passed {
formatted_str = formatted_str + trailing_delimiter.unwrap_or("");
@@ -270,7 +311,7 @@ fn build_citekey(entry: &Entry, pattern_fields: &[String], case: Option<&Citekey
fn preformat_field(field: &str, entry: &Entry) -> String {
match field {
"title" => {
- sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into()))
+ sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
}
"author" => {
if let Ok(authors) = entry.author() {
@@ -278,21 +319,20 @@ fn preformat_field(field: &str, entry: &Entry) -> String {
for a in authors.iter() {
last_names = last_names + &a.name + " ";
}
- dbg!(&last_names);
last_names
} else {
- "NA".to_string()
+ "".to_string()
}
}
"year" => {
if let Ok(date) = entry.date() {
date.to_chunks().format_verbatim()[..4].to_string()
} else {
- entry.get_as::<String>(field).unwrap_or("NA".into())
+ entry.get_as::<String>(field).unwrap_or("".into())
}
}
"subtitle" => {
- sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("NA".into()))
+ sanitize_single_string_fully(&entry.get_as::<String>(field).unwrap_or("".into()))
}
"editor" => {
if let Ok(editors) = entry.editors() {
@@ -304,22 +344,35 @@ fn preformat_field(field: &str, entry: &Entry) -> String {
}
last_names
} else {
- "NA".to_string()
+ "".to_string()
}
}
"pubtype" | "entrytype" => entry.entry_type.to_string(),
- _ => entry.get_as::<String>(field).unwrap_or("Empty".into()),
+ _ => entry.get_as::<String>(field).unwrap_or("".into()),
}
}
/// Cut of word at char count index if its set
-fn format_word(word: &str, count: Option<usize>) -> &str {
+fn format_word(word: &str, count: Option<usize>) -> String {
if let Some(len) = count
&& len < word.chars().count()
{
- &word[..len]
+ // Since chars can consist of multiple bytes, we need this more complex
+ // loop to collect a specified number of chars (e.g. ÄÖÜäöü¢æø etc...)
+ // instead of simple byte indexing
+ let mut word_slice = String::new();
+ let word_chars = word.chars();
+ let mut counter = 0;
+ for c in word_chars {
+ if counter == len {
+ break;
+ }
+ word_slice.push(c);
+ counter += 1;
+ }
+ word_slice
} else {
- word
+ word.to_string()
}
}
@@ -427,6 +480,7 @@ mod tests {
],
case: None,
old_new_keys_map: Vec::new(),
+ dry_run: false,
};
let _ = formatting_struct.do_formatting();
assert_eq!(
diff --git a/tests/test-config.toml b/tests/test-config.toml
index fc447f1..b484b69 100644
--- a/tests/test-config.toml
+++ b/tests/test-config.toml
@@ -59,3 +59,7 @@ custom_column = "series"
# author_color = "38"
# title_color = "37"
# year_color = "135"
+
+[citekey_formatter]
+fields = [ "author;2;;-;_", "title;3;3;_;_", "year" ]
+case = "lowercase"