aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlukeflo2025-10-18 13:03:08 +0200
committerlukeflo2025-10-18 13:03:08 +0200
commit231353de2ce5713a4848178f031101534b529442 (patch)
treef8612664038a10f847c4c5b1ae9d8a2a2144cbf5
parent05dd5cbc2f5fd8252f9ed3b0e79cf4d785791f98 (diff)
downloadbibiman-231353de2ce5713a4848178f031101534b529442.tar.gz
bibiman-231353de2ce5713a4848178f031101534b529442.zip
add some additional ignored chars and words, check deunicoded chars for ignoring
-rw-r--r--CITEKEYS.md4
-rw-r--r--src/bibiman/citekeys.rs3
-rw-r--r--src/bibiman/citekeys/citekey_utils.rs34
-rw-r--r--src/config.rs134
-rw-r--r--tests/biblatex-test.bib2
5 files changed, 107 insertions, 70 deletions
diff --git a/CITEKEYS.md b/CITEKEYS.md
index 828e557..d9855b8 100644
--- a/CITEKEYS.md
+++ b/CITEKEYS.md
@@ -30,7 +30,9 @@ fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ]
case = "lowercase"
ascii_only = true
ignored_chars = [
- "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", "\"",
+ '?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%',
+ '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', '»', '«', '‘', '’',
+ '“', '”',
]
ignored_words = [
"the",
diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs
index fdeed14..a56969b 100644
--- a/src/bibiman/citekeys.rs
+++ b/src/bibiman/citekeys.rs
@@ -49,7 +49,8 @@ pub enum CitekeyCase {
alias = "camelcase",
alias = "camel_case",
alias = "uppercamelcase",
- alias = "upper_camel_case"
+ alias = "upper_camel_case",
+ alias = "pascalcase"
)]
Camel,
}
diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs
index 2de3600..3f12f03 100644
--- a/src/bibiman/citekeys/citekey_utils.rs
+++ b/src/bibiman/citekeys/citekey_utils.rs
@@ -175,24 +175,36 @@ pub(super) fn build_citekey(
if counter == 0 && case == Some(&CitekeyCase::Camel) {
c = c.to_ascii_uppercase()
}
- if let Some(len) = max_chars
- && counter >= len
- {
- break 'char_loop;
- }
- // if a word slice contains a special char, skip it
- if ignored_chars.contains(&c) {
- continue 'char_loop;
- }
// if non-ascii chars should be mapped, check if needed and do it
if let Some(chars) = deunicode::deunicode_char(c)
&& ascii_only
{
- word_slice.push_str(chars);
- counter += chars.len();
+ 'deunicode_char_loop: for ch in chars.chars() {
+ // if the deunicoded charset of the word slice's current
+ // char contains a special char, skip it
+ if ignored_chars.contains(&ch) {
+ continue 'deunicode_char_loop;
+ }
+ word_slice.push(ch);
+ counter += 1;
+ if let Some(len) = max_chars
+ && counter >= len
+ {
+ break 'char_loop;
+ }
+ }
} else {
+ // if a word slice contains a special char, skip it
+ if ignored_chars.contains(&c) {
+ continue 'char_loop;
+ }
word_slice.push(c);
counter += 1;
+ if let Some(len) = max_chars
+ && counter >= len
+ {
+ break 'char_loop;
+ }
}
}
// Don't count empty slices and don't add delimiter to those
diff --git a/src/config.rs b/src/config.rs
index 47e145c..e0a097c 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -36,43 +36,49 @@ use crate::{
cliargs::CLIArgs,
};
-pub const IGNORED_SPECIAL_CHARS: [char; 33] = [
+pub const IGNORED_SPECIAL_CHARS: [char; 39] = [
'?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%',
- '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"',
+ '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', '»', '«', '‘', '’',
+ '“', '”',
];
pub static IGNORED_WORDS: LazyLock<Vec<String>> = LazyLock::new(|| {
vec![
- String::from("the"),
- String::from("a"),
- String::from("an"),
- String::from("of"),
- String::from("for"),
- String::from("in"),
- String::from("at"),
- String::from("to"),
- String::from("and"),
- String::from("him"),
- String::from("her"),
- String::from("his"),
- String::from("hers"),
- String::from("der"),
- String::from("die"),
- String::from("das"),
- String::from("ein"),
- String::from("eine"),
- String::from("eines"),
- String::from("des"),
- String::from("auf"),
- String::from("und"),
- String::from("für"),
- String::from("vor"),
- String::from("er"),
- String::from("sie"),
- String::from("es"),
- String::from("ihm"),
- String::from("ihr"),
- String::from("ihnen"),
+ "the".into(),
+ "a".into(),
+ "an".into(),
+ "of".into(),
+ "for".into(),
+ "in".into(),
+ "at".into(),
+ "to".into(),
+ "and".into(),
+ "him".into(),
+ "her".into(),
+ "his".into(),
+ "he".into(),
+ "she".into(),
+ "it".into(),
+ "hers".into(),
+ "der".into(),
+ "die".into(),
+ "das".into(),
+ "ein".into(),
+ "eine".into(),
+ "eines".into(),
+ "des".into(),
+ "auf".into(),
+ "und".into(),
+ "für".into(),
+ "vor".into(),
+ "er".into(),
+ "sie".into(),
+ "es".into(),
+ "ihm".into(),
+ "ihr".into(),
+ "ihnen".into(),
+ "zum".into(),
+ "dem".into(),
]
});
@@ -159,34 +165,50 @@ const DEFAULT_CONFIG: &str = r##"
# ascii_only = true
## List of special chars that'll be ignored when building citekeys.
-## A custom list will overwrite the default list
+## A custom list will overwrite the default list. Thus, to add to the list,
+## uncomment it and add the additional chars.
# ignored_chars = [
-# "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", """,
-# ]
+# '?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%', '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', '»', '«', '‘', '’', '“', '”',
## List of words that'll be ignored when building citekeys.
-## A custom list will overwrite the default list
+## A custom list will overwrite the default list. Thus, to add to the list,
+## uncomment it and add the additional words.
# ignored_words = [
-# "the",
-# "a",
-# "an",
-# "of",
-# "for",
-# "in",
-# "at",
-# "to",
-# "and",
-# "der",
-# "die",
-# "das",
-# "ein",
-# "eine",
-# "eines",
-# "des",
-# "auf",
-# "und",
-# "für",
-# "vor",
+# "the"
+# "a"
+# "an"
+# "of"
+# "for"
+# "in"
+# "at"
+# "to"
+# "and"
+# "him"
+# "her"
+# "his"
+# "he"
+# "she"
+# "it"
+# "hers"
+# "der"
+# "die"
+# "das"
+# "ein"
+# "eine"
+# "eines"
+# "des"
+# "auf"
+# "und"
+# "für"
+# "vor"
+# "er"
+# "sie"
+# "es"
+# "ihm"
+# "ihr"
+# "ihnen"
+# "zum"
+# "dem"
# ]
"##;
diff --git a/tests/biblatex-test.bib b/tests/biblatex-test.bib
index 2149e7c..941a24d 100644
--- a/tests/biblatex-test.bib
+++ b/tests/biblatex-test.bib
@@ -40,7 +40,7 @@
}
@article{angenendt,
- title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der Patrozinienkunde},
+ title = {“In Honore Salvatoris”~-- Vom Sinn und Unsinn der Patrozinienkunde},
shorttitle = {In Honore Salvatoris},
author = {Angenendt, Arnold},
volume = {97},